1.1 MiB
1.1 MiB
In [1]:
import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt %matplotlib inline
In [2]:
import matplotlib.pyplot as plt plt.rcParams["font.sans-serif"]=["SimHei"] #设置字体 plt.rcParams["axes.unicode_minus"]=False #该语句解决图像中的“-”负号的乱码问题
In [3]:
data_path = "./data/煤质碳材料数据.xlsx"
In [4]:
data = pd.read_excel(data_path)
In [5]:
# 缺失值填充 ## 根据煤种分类,将缺失值进行补全,采用均值填充 ## 用不上,因为分析数据,X值缺失的时候,Y也会缺失 ## 萃取中级烟煤还只有一条数据,无法参考补全 ## 直接删除X缺失数据 # 补全: # grouped = data.groupby('煤种') # # 定义填充函数 # def fill_with_mean(group, name): # group[name].fillna(group[name].mean(), inplace=True) # return group # # 在每个分组内使用均值法填充空值 # data_filled = grouped.apply(fill_with_mean,"分析水Mad") # 删除某一列 data_full = data.dropna(axis=0,subset = ['编号', '煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)', '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']) del data_full['编号']
In [1]:
import matplotlib as mpl print(mpl.get_cachedir())
/root/.cache/matplotlib
In [6]:
data_full = data_full.reset_index(drop=True)
In [ ]:
# one-hot 编码处理煤种数据 #from sklearn.preprocessing import OneHotEncoder # encoder = OneHotEncoder() # encoded_data = encoder.fit_transform(data_full[['煤种']]) # # 将稀疏矩阵转换为数组 # encoded_array = encoded_data.toarray() # # 创建编码后的 DataFrame # encoded_df = pd.DataFrame(encoded_array, columns=encoder.get_feature_names_out(['煤种'])) # data_full_one_hot = pd.concat([data_full, encoded_df], axis=1) # del data_full_one_hot['煤种']
In [8]:
# 字典映射后准备归一化 from sklearn.preprocessing import LabelEncoder encoder = LabelEncoder() encoded_labels = encoder.fit_transform(data_full['煤种']) data_full['煤种'] = encoded_labels
In [9]:
# 归一化,数据量太小了,且数据无正态分布一说感觉,直接归一化 x_col = ['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)', '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3'] y_col = ['孔体积(cm3/g)','微孔体积(cm3/g)', '介孔体积(cm3/g)','BET比表面积(m2/g)'] # from sklearn.preprocessing import StandardScaler # scaler = StandardScaler() # normalized_data = scaler.fit_transform(data_full[x_col]) from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() normalized_data = scaler.fit_transform(data_full[x_col]) normalized_df = pd.DataFrame(normalized_data, columns=x_col) data_full_minmax = pd.concat([normalized_df,data_full[y_col]],axis=1)
In [10]:
data_full_minmax
Out[10]:
煤种 | 分析水Mad | 灰分 | 挥发分 | 碳 | 氢 | 氮 | 硫 | 氧 | 碳化温度(℃) | 升温速率(℃/min) | 保温时间(h) | KOH | K2CO3 | 孔体积(cm3/g) | 微孔体积(cm3/g) | 介孔体积(cm3/g) | BET比表面积(m2/g) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.090909 | 0.040520 | 0.176027 | 0.579416 | 0.897402 | 0.181024 | 0.333333 | 0.000000 | 0.141774 | 1.0 | 0.000000 | 0.6 | 0.0 | 0.0 | 0.270 | NaN | NaN | 296.0 |
1 | 0.727273 | 0.436127 | 0.089271 | 0.755583 | 0.552794 | 0.131548 | 0.245763 | 1.000000 | 0.670623 | 0.1 | 0.285714 | 0.0 | 1.0 | 0.0 | 0.356 | 0.289 | 0.067 | 665.0 |
2 | 0.727273 | 0.436127 | 0.089271 | 0.755583 | 0.552794 | 0.131548 | 0.245763 | 1.000000 | 0.670623 | 0.1 | 0.285714 | 0.0 | 1.0 | 0.0 | 0.608 | 0.482 | 0.126 | 1221.0 |
3 | 0.727273 | 0.436127 | 0.089271 | 0.755583 | 0.552794 | 0.131548 | 0.245763 | 1.000000 | 0.670623 | 0.1 | 0.285714 | 0.0 | 1.0 | 0.0 | 1.438 | 0.670 | 0.768 | 2609.0 |
4 | 0.727273 | 0.436127 | 0.089271 | 0.755583 | 0.552794 | 0.131548 | 0.245763 | 1.000000 | 0.670623 | 0.1 | 0.285714 | 0.0 | 1.0 | 0.0 | 1.321 | 0.599 | 0.722 | 2323.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
64 | 0.272727 | 0.000000 | 0.085080 | 0.151960 | 0.998131 | 0.096042 | 0.378531 | 0.058989 | 0.000000 | 0.4 | 0.107143 | 0.2 | 1.0 | 0.0 | 1.608 | 1.204 | 0.404 | 3142.0 |
65 | 0.272727 | 0.000000 | 0.085080 | 0.151960 | 0.998131 | 0.096042 | 0.378531 | 0.058989 | 0.000000 | 0.4 | 0.107143 | 0.2 | 1.0 | 0.0 | 2.041 | 1.022 | 1.019 | 3389.0 |
66 | 0.272727 | 0.002165 | 0.174560 | 0.137279 | 1.000000 | 0.000000 | 0.457627 | 0.000000 | 0.048797 | 0.2 | 0.107143 | 0.2 | 1.0 | 0.0 | 1.135 | 0.916 | 0.219 | 2542.0 |
67 | 0.272727 | 0.002165 | 0.174560 | 0.137279 | 1.000000 | 0.000000 | 0.457627 | 0.000000 | 0.048797 | 0.4 | 0.107143 | 0.2 | 1.0 | 0.0 | 1.219 | 0.947 | 0.272 | 2665.0 |
68 | 0.272727 | 0.002165 | 0.174560 | 0.137279 | 1.000000 | 0.000000 | 0.457627 | 0.000000 | 0.048797 | 0.6 | 0.107143 | 0.2 | 1.0 | 0.0 | 1.473 | 0.718 | 0.755 | 2947.0 |
69 rows × 18 columns
四套数据集¶
In [12]:
data_1=data_full_minmax.drop(columns=['孔体积(cm3/g)','微孔体积(cm3/g)', '介孔体积(cm3/g)']) data_1 = data_1.dropna(axis=0, subset=['BET比表面积(m2/g)'])
In [13]:
train_corr = data_1.corr() ax = plt.subplots(figsize=(20,16)) ax = sns.heatmap(train_corr, vmax=.8, square=True, annot=True)
In [14]:
data_2 = data_full_minmax.drop(columns=['BET比表面积(m2/g)','微孔体积(cm3/g)', '介孔体积(cm3/g)']) data_2 = data_2.dropna(axis=0, subset=['孔体积(cm3/g)'])
In [15]:
train_corr = data_2.corr() ax = plt.subplots(figsize=(20,16)) ax = sns.heatmap(train_corr, vmax=.8, square=True, annot=True)
In [16]:
data_3 = data_full_minmax.drop(columns=['BET比表面积(m2/g)','孔体积(cm3/g)', '介孔体积(cm3/g)']) data_3 = data_3.dropna(axis=0, subset=['微孔体积(cm3/g)'])
In [17]:
train_corr = data_3.corr() ax = plt.subplots(figsize=(20,16)) ax = sns.heatmap(train_corr, vmax=.8, square=True, annot=True)
In [18]:
data_4 = data_full_minmax.drop(columns=['BET比表面积(m2/g)','孔体积(cm3/g)', '微孔体积(cm3/g)']) data_4 = data_4.dropna(axis=0, subset=['介孔体积(cm3/g)'])
In [19]:
train_corr = data_4.corr() ax = plt.subplots(figsize=(20,16)) ax = sns.heatmap(train_corr, vmax=.8, square=True, annot=True)
BET比表面积(m2/g)¶
In [20]:
from sklearn.model_selection import train_test_split X = data_1[['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)', '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']] y = data_1['BET比表面积(m2/g)'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
线性回归¶
In [21]:
from sklearn.linear_model import LinearRegression clf = LinearRegression() clf.fit(X_train, y_train) test_pred = clf.predict(X_test)
In [22]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score # 计算均方误差(MSE) def get_acc(y_test, test_pred): mse = mean_squared_error(y_test,test_pred) print("Mean Squared Error:", mse) # 计算均方根误差(RMSE) rmse = np.sqrt(mse) print("Root Mean Squared Error:", rmse) # 计算平均绝对误差(MAE) mae = mean_absolute_error(y_test,test_pred) print("Mean Absolute Error:", mae) # 计算决定系数(R²) r2 = r2_score(y_test,test_pred) print("Coefficient of Determination (R²):", r2)
In [23]:
get_acc(y_test,test_pred)
Mean Squared Error: 356346.0854506917 Root Mean Squared Error: 596.9473054220881 Mean Absolute Error: 421.32564458570505 Coefficient of Determination (R²): 0.6570769358934058
In [24]:
coefficients = clf.coef_ key = ['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)', '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3'] for index,i in enumerate(coefficients): print(key[index] + ':' + str(i))
煤种:-877.2972339819474 分析水Mad:1114.112507797246 灰分:-1080.5492474856756 挥发分:-1014.5682008519212 碳:2502.1689304953256 氢:1547.1118343327066 氮:-310.53231471047394 硫:622.2484614557459 氧:1735.089974709626 碳化温度(℃):949.2902933052904 升温速率(℃/min):16.004040022910374 保温时间(h):999.0720137977573 KOH:1593.2510277610688 K2CO3:-284.0768698715867
LightGBM¶
In [25]:
import lightgbm as lgb # 创建 LightGBM 数据集 train_data = lgb.Dataset(X_train, label=y_train) test_data = lgb.Dataset(X_test, label=y_test, reference=train_data) # 设置参数 params = { 'objective': 'regression', # 回归问题 'boosting_type': 'gbdt', # 使用梯度提升 'metric': 'rmse', # 使用均方根误差作为评估指标 'num_leaves': 2, # 每棵树的叶子节点数 'learning_rate': 0.01, # 学习率 } # 训练模型 #定义callback回调 callback=[lgb.early_stopping(stopping_rounds=10,verbose=True), lgb.log_evaluation(period=10,show_stdv=True)] # 训练 train m1 = lgb.train(params,train_data,num_boost_round=200, valid_sets=[train_data,test_data],callbacks=callback) # 进行预测 y_pred = m1.predict(X_test, num_iteration=m1.best_iteration) get_acc(y_test,y_pred)
[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000416 seconds. You can set `force_row_wise=true` to remove the overhead. And if memory is not enough, you can set `force_col_wise=true`. [LightGBM] [Info] Total Bins 112 [LightGBM] [Info] Number of data points in the train set: 58, number of used features: 11 [LightGBM] [Info] Start training from score 2093.679985 Training until validation scores don't improve for 10 rounds [10] training's rmse: 963.582 valid_1's rmse: 1065.25 [20] training's rmse: 951.788 valid_1's rmse: 1058.77 [30] training's rmse: 941.053 valid_1's rmse: 1052.87 [40] training's rmse: 931.293 valid_1's rmse: 1047.48 [50] training's rmse: 922.404 valid_1's rmse: 1041.26 [60] training's rmse: 914.075 valid_1's rmse: 1032.99 [70] training's rmse: 906.208 valid_1's rmse: 1025.87 [80] training's rmse: 898.771 valid_1's rmse: 1017.92 [90] training's rmse: 891.757 valid_1's rmse: 1011.48 [100] training's rmse: 885.133 valid_1's rmse: 1004.27 [110] training's rmse: 878.885 valid_1's rmse: 997.768 [120] training's rmse: 872.995 valid_1's rmse: 992.232 [130] training's rmse: 867.436 valid_1's rmse: 987.243 [140] training's rmse: 862.191 valid_1's rmse: 984.017 [150] training's rmse: 857.24 valid_1's rmse: 979.536 [160] training's rmse: 852.571 valid_1's rmse: 975.56 [170] training's rmse: 848.168 valid_1's rmse: 971.554 [180] training's rmse: 844 valid_1's rmse: 969.917 [190] training's rmse: 840.022 valid_1's rmse: 966.922 [200] training's rmse: 836.228 valid_1's rmse: 965.609 Did not meet early stopping. Best iteration is: [200] training's rmse: 836.228 valid_1's rmse: 965.609 Mean Squared Error: 932401.3299617766 Root Mean Squared Error: 965.609305030651 Mean Absolute Error: 803.9783213816435 Coefficient of Determination (R²): 0.1027208264595928
In [26]:
plt.figure(figsize=(12,6)) lgb.plot_importance(m1, max_num_features=30) plt.title("Featurertances") plt.show()
<Figure size 1200x600 with 0 Axes>
深度学习¶
In [27]:
import torch from torch.utils.data import TensorDataset, DataLoader from sklearn.model_selection import train_test_split import torch.nn as nn import torch.optim as optim from sklearn.metrics import mean_squared_error
In [49]:
X = data_1[['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)', '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']] y = data_1['BET比表面积(m2/g)']
In [50]:
x_tensor = torch.Tensor(X.values) # 将训练数据 x 转换为张量 y_tensor = torch.Tensor(y.values).reshape(-1,1) # 将标签数据 y 转换为张量
In [51]:
#dataset = TensorDataset(x_tensor, y_tensor) test_size = 0.15 # 设置测试集的比例 random_seed = 42 # 设置随机种子以确保可重复性 x_train, x_test, y_train, y_test = train_test_split(x_tensor, y_tensor, test_size=test_size, random_state=random_seed)
In [56]:
scaler = MinMaxScaler() scaler.fit(y_train) y_train = torch.Tensor(scaler.transform(y_train)) y_test = torch.Tensor(scaler.transform(y_test))
In [61]:
batch_size = 4 # 设置每个批次的大小 train_loader = DataLoader(TensorDataset(x_train, y_train), batch_size=batch_size, shuffle=True) test_loader = DataLoader(TensorDataset(x_test, y_test), batch_size=batch_size) # drop_last=True
In [38]:
# for i in range(2): # for inputs, labels in train_loader: # print(labels) # break
In [89]:
# 定义一个简单的前馈神经网络 class Net(nn.Module): def __init__(self, input_size, hidden_size, output_size): super(Net, self).__init__() self.fc1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.sigmoid = nn.Sigmoid() self.fc2 = nn.Linear(hidden_size, 10) self.fc3 = nn.Linear(10,output_size) def forward(self, x): x = self.fc1(x) x = self.sigmoid(x) x = self.fc2(x) x = self.sigmoid(x) x = self.fc3(x) return x # 定义训练参数 input_size = 14 hidden_size = 40 output_size = 1 learning_rate = 0.01 num_epochs = 100 # 创建模型实例 model = Net(input_size, hidden_size, output_size) # 定义损失函数和优化器 criterion = nn.MSELoss() # 使用均方误差损失 optimizer = optim.Adam(model.parameters(), lr=learning_rate) # 训练回归模型 num_epochs = 500 for epoch in range(num_epochs): model.train() for inputs, labels in train_loader: optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')
Epoch [1/500], Loss: 0.1375112235546112 Epoch [2/500], Loss: 0.05806560814380646 Epoch [3/500], Loss: 0.09772428125143051 Epoch [4/500], Loss: 0.021364646032452583 Epoch [5/500], Loss: 0.01773611083626747 Epoch [6/500], Loss: 0.021522678434848785 Epoch [7/500], Loss: 0.008145670406520367 Epoch [8/500], Loss: 0.06583523005247116 Epoch [9/500], Loss: 0.01977309212088585 Epoch [10/500], Loss: 0.023928888142108917 Epoch [11/500], Loss: 0.027664322406053543 Epoch [12/500], Loss: 0.0042370702140033245 Epoch [13/500], Loss: 0.02011457458138466 Epoch [14/500], Loss: 0.0015053569804877043 Epoch [15/500], Loss: 0.03043498657643795 Epoch [16/500], Loss: 0.018061762675642967 Epoch [17/500], Loss: 0.03032355196774006 Epoch [18/500], Loss: 0.003073741216212511 Epoch [19/500], Loss: 0.030238714069128036 Epoch [20/500], Loss: 0.013475775718688965 Epoch [21/500], Loss: 0.0019919639453291893 Epoch [22/500], Loss: 0.0721120536327362 Epoch [23/500], Loss: 0.018716545775532722 Epoch [24/500], Loss: 0.03277812898159027 Epoch [25/500], Loss: 0.0069098640233278275 Epoch [26/500], Loss: 0.02530064806342125 Epoch [27/500], Loss: 0.031360749155282974 Epoch [28/500], Loss: 0.007204529829323292 Epoch [29/500], Loss: 0.008599984459578991 Epoch [30/500], Loss: 0.003325084922835231 Epoch [31/500], Loss: 0.022524351254105568 Epoch [32/500], Loss: 0.014607256278395653 Epoch [33/500], Loss: 0.018749907612800598 Epoch [34/500], Loss: 0.0009139125468209386 Epoch [35/500], Loss: 0.012738749384880066 Epoch [36/500], Loss: 0.033581215888261795 Epoch [37/500], Loss: 0.016028260812163353 Epoch [38/500], Loss: 0.020110584795475006 Epoch [39/500], Loss: 0.02486024796962738 Epoch [40/500], Loss: 0.002677252981811762 Epoch [41/500], Loss: 0.012438181787729263 Epoch [42/500], Loss: 0.01628699153661728 Epoch [43/500], Loss: 0.028455808758735657 Epoch [44/500], Loss: 0.001482177060097456 Epoch [45/500], Loss: 0.006345514673739672 Epoch [46/500], Loss: 0.03298439830541611 Epoch [47/500], Loss: 0.05287860333919525 Epoch [48/500], Loss: 0.0027091410011053085 Epoch [49/500], Loss: 0.0118508730083704 Epoch [50/500], Loss: 0.0038459282368421555 Epoch [51/500], Loss: 0.024839868769049644 Epoch [52/500], Loss: 0.0030711244326084852 Epoch [53/500], Loss: 0.041477471590042114 Epoch [54/500], Loss: 0.04405653104186058 Epoch [55/500], Loss: 0.0023413999006152153 Epoch [56/500], Loss: 0.005603765603154898 Epoch [57/500], Loss: 0.05010407418012619 Epoch [58/500], Loss: 0.002134432550519705 Epoch [59/500], Loss: 0.0016706496244296432 Epoch [60/500], Loss: 0.0031203352846205235 Epoch [61/500], Loss: 0.022609718143939972 Epoch [62/500], Loss: 0.007998031564056873 Epoch [63/500], Loss: 0.03089986741542816 Epoch [64/500], Loss: 0.02436087280511856 Epoch [65/500], Loss: 0.0439436249434948 Epoch [66/500], Loss: 0.009039153344929218 Epoch [67/500], Loss: 0.03196775168180466 Epoch [68/500], Loss: 0.0014594022650271654 Epoch [69/500], Loss: 0.003043722826987505 Epoch [70/500], Loss: 0.02321109175682068 Epoch [71/500], Loss: 0.001104667317122221 Epoch [72/500], Loss: 0.0014685456408187747 Epoch [73/500], Loss: 0.010093788616359234 Epoch [74/500], Loss: 0.025984715670347214 Epoch [75/500], Loss: 0.055412407964468 Epoch [76/500], Loss: 0.027970263734459877 Epoch [77/500], Loss: 0.00738972332328558 Epoch [78/500], Loss: 0.0005837245844304562 Epoch [79/500], Loss: 0.002833475824445486 Epoch [80/500], Loss: 0.039277084171772 Epoch [81/500], Loss: 0.006304681301116943 Epoch [82/500], Loss: 0.027889391407370567 Epoch [83/500], Loss: 0.04046902805566788 Epoch [84/500], Loss: 0.0026628756895661354 Epoch [85/500], Loss: 0.00805905181914568 Epoch [86/500], Loss: 0.039849903434515 Epoch [87/500], Loss: 0.0007797772996127605 Epoch [88/500], Loss: 0.0935254693031311 Epoch [89/500], Loss: 0.02649867907166481 Epoch [90/500], Loss: 0.004896295256912708 Epoch [91/500], Loss: 0.0026272933464497328 Epoch [92/500], Loss: 0.04471985623240471 Epoch [93/500], Loss: 0.005443623289465904 Epoch [94/500], Loss: 0.006703123450279236 Epoch [95/500], Loss: 0.03889564424753189 Epoch [96/500], Loss: 0.002041529631242156 Epoch [97/500], Loss: 0.022731401026248932 Epoch [98/500], Loss: 0.02479637786746025 Epoch [99/500], Loss: 0.002792914630845189 Epoch [100/500], Loss: 0.01177084632217884 Epoch [101/500], Loss: 0.0010617190273478627 Epoch [102/500], Loss: 0.006667059846222401 Epoch [103/500], Loss: 0.06580355018377304 Epoch [104/500], Loss: 0.11343423277139664 Epoch [105/500], Loss: 0.005578789860010147 Epoch [106/500], Loss: 0.0020729107782244682 Epoch [107/500], Loss: 0.0024541851598769426 Epoch [108/500], Loss: 0.04115631431341171 Epoch [109/500], Loss: 0.0013442374765872955 Epoch [110/500], Loss: 0.0062351468950510025 Epoch [111/500], Loss: 0.0005676917498931289 Epoch [112/500], Loss: 0.02799421362578869 Epoch [113/500], Loss: 0.0011389830615371466 Epoch [114/500], Loss: 0.0016039859037846327 Epoch [115/500], Loss: 0.0019230787875130773 Epoch [116/500], Loss: 0.0062424009665846825 Epoch [117/500], Loss: 0.01075577363371849 Epoch [118/500], Loss: 0.17046409845352173 Epoch [119/500], Loss: 0.008756555616855621 Epoch [120/500], Loss: 0.02538108266890049 Epoch [121/500], Loss: 0.0027733466122299433 Epoch [122/500], Loss: 0.002166420454159379 Epoch [123/500], Loss: 0.0005181884625926614 Epoch [124/500], Loss: 0.002128859516233206 Epoch [125/500], Loss: 0.048927824944257736 Epoch [126/500], Loss: 0.006466357968747616 Epoch [127/500], Loss: 0.0013223910937085748 Epoch [128/500], Loss: 0.016330869868397713 Epoch [129/500], Loss: 0.0340036004781723 Epoch [130/500], Loss: 0.005737701430916786 Epoch [131/500], Loss: 0.043695010244846344 Epoch [132/500], Loss: 0.02377896197140217 Epoch [133/500], Loss: 0.03224272280931473 Epoch [134/500], Loss: 0.0022535149473696947 Epoch [135/500], Loss: 0.005497700069099665 Epoch [136/500], Loss: 0.0016764058964326978 Epoch [137/500], Loss: 0.013641731813549995 Epoch [138/500], Loss: 0.03554676100611687 Epoch [139/500], Loss: 0.002655998570844531 Epoch [140/500], Loss: 0.014379335567355156 Epoch [141/500], Loss: 0.0004798714362550527 Epoch [142/500], Loss: 0.022848913446068764 Epoch [143/500], Loss: 0.003569053253158927 Epoch [144/500], Loss: 0.0010363985784351826 Epoch [145/500], Loss: 0.0011998001718893647 Epoch [146/500], Loss: 0.029332881793379784 Epoch [147/500], Loss: 0.0021700109355151653 Epoch [148/500], Loss: 0.04989006742835045 Epoch [149/500], Loss: 0.017596838995814323 Epoch [150/500], Loss: 0.005552900023758411 Epoch [151/500], Loss: 0.02130822092294693 Epoch [152/500], Loss: 0.013184739276766777 Epoch [153/500], Loss: 0.003693445585668087 Epoch [154/500], Loss: 0.0002814586041495204 Epoch [155/500], Loss: 0.011256406083703041 Epoch [156/500], Loss: 0.0007573600159958005 Epoch [157/500], Loss: 0.0019472946878522635 Epoch [158/500], Loss: 0.015084566548466682 Epoch [159/500], Loss: 0.006373480428010225 Epoch [160/500], Loss: 0.0014554281951859593 Epoch [161/500], Loss: 0.003039491828531027 Epoch [162/500], Loss: 0.0013355282135307789 Epoch [163/500], Loss: 0.0038741338066756725 Epoch [164/500], Loss: 0.0035878620110452175 Epoch [165/500], Loss: 0.030438825488090515 Epoch [166/500], Loss: 0.0697525143623352 Epoch [167/500], Loss: 0.026584984734654427 Epoch [168/500], Loss: 0.0019321260042488575 Epoch [169/500], Loss: 0.05357321351766586 Epoch [170/500], Loss: 0.016700396314263344 Epoch [171/500], Loss: 0.09351684153079987 Epoch [172/500], Loss: 0.008441533893346786 Epoch [173/500], Loss: 0.016276434063911438 Epoch [174/500], Loss: 0.07126587629318237 Epoch [175/500], Loss: 0.005204909015446901 Epoch [176/500], Loss: 0.003409262513741851 Epoch [177/500], Loss: 0.04996544122695923 Epoch [178/500], Loss: 0.04525388404726982 Epoch [179/500], Loss: 0.009854236617684364 Epoch [180/500], Loss: 0.0035332152619957924 Epoch [181/500], Loss: 0.0025211272295564413 Epoch [182/500], Loss: 0.004975481424480677 Epoch [183/500], Loss: 0.0014112028293311596 Epoch [184/500], Loss: 0.002589487237855792 Epoch [185/500], Loss: 0.0003969790996052325 Epoch [186/500], Loss: 0.07826998084783554 Epoch [187/500], Loss: 0.0003743321285583079 Epoch [188/500], Loss: 0.028815554454922676 Epoch [189/500], Loss: 0.006910406053066254 Epoch [190/500], Loss: 0.0004680223355535418 Epoch [191/500], Loss: 0.0004503509262576699 Epoch [192/500], Loss: 0.0013653915375471115 Epoch [193/500], Loss: 0.06225353106856346 Epoch [194/500], Loss: 0.01694313995540142 Epoch [195/500], Loss: 0.024628346785902977 Epoch [196/500], Loss: 0.008102591149508953 Epoch [197/500], Loss: 0.042955197393894196 Epoch [198/500], Loss: 0.04331319406628609 Epoch [199/500], Loss: 0.04208041727542877 Epoch [200/500], Loss: 0.0032112603075802326 Epoch [201/500], Loss: 0.0031181415542960167 Epoch [202/500], Loss: 0.006705279462039471 Epoch [203/500], Loss: 0.006221733056008816 Epoch [204/500], Loss: 0.06713331490755081 Epoch [205/500], Loss: 0.0023540379479527473 Epoch [206/500], Loss: 0.012740552425384521 Epoch [207/500], Loss: 0.03839072957634926 Epoch [208/500], Loss: 0.0030456939712166786 Epoch [209/500], Loss: 0.012639260850846767 Epoch [210/500], Loss: 0.08555809408426285 Epoch [211/500], Loss: 0.011615005321800709 Epoch [212/500], Loss: 0.004777462687343359 Epoch [213/500], Loss: 0.012780732475221157 Epoch [214/500], Loss: 0.009147602133452892 Epoch [215/500], Loss: 0.0058478908613324165 Epoch [216/500], Loss: 0.04147972911596298 Epoch [217/500], Loss: 0.005171542055904865 Epoch [218/500], Loss: 7.350118539761752e-05 Epoch [219/500], Loss: 0.014487506821751595 Epoch [220/500], Loss: 0.014558564871549606 Epoch [221/500], Loss: 0.004064792767167091 Epoch [222/500], Loss: 0.013005146756768227 Epoch [223/500], Loss: 0.0153804374858737 Epoch [224/500], Loss: 0.0026739821769297123 Epoch [225/500], Loss: 0.021649105474352837 Epoch [226/500], Loss: 4.6764260332565755e-05 Epoch [227/500], Loss: 0.008229969069361687 Epoch [228/500], Loss: 0.03477006033062935 Epoch [229/500], Loss: 0.045727379620075226 Epoch [230/500], Loss: 0.005768945906311274 Epoch [231/500], Loss: 0.03247058391571045 Epoch [232/500], Loss: 0.012503192760050297 Epoch [233/500], Loss: 0.002126991981640458 Epoch [234/500], Loss: 0.010053770616650581 Epoch [235/500], Loss: 0.00034815288381651044 Epoch [236/500], Loss: 0.048214126378297806 Epoch [237/500], Loss: 0.00026693425024859607 Epoch [238/500], Loss: 2.02381270355545e-05 Epoch [239/500], Loss: 0.005556574556976557 Epoch [240/500], Loss: 0.03920856863260269 Epoch [241/500], Loss: 0.0034664086997509003 Epoch [242/500], Loss: 0.04071921110153198 Epoch [243/500], Loss: 0.0006618301849812269 Epoch [244/500], Loss: 0.0021888422779738903 Epoch [245/500], Loss: 0.0013250377960503101 Epoch [246/500], Loss: 0.003629772923886776 Epoch [247/500], Loss: 0.0594235323369503 Epoch [248/500], Loss: 0.016643522307276726 Epoch [249/500], Loss: 0.009673218242824078 Epoch [250/500], Loss: 0.0036092321388423443 Epoch [251/500], Loss: 0.05879771336913109 Epoch [252/500], Loss: 0.003090853802859783 Epoch [253/500], Loss: 0.02111334726214409 Epoch [254/500], Loss: 0.022870125249028206 Epoch [255/500], Loss: 0.05318121984601021 Epoch [256/500], Loss: 0.0017253202386200428 Epoch [257/500], Loss: 0.007201156113296747 Epoch [258/500], Loss: 0.002112701768055558 Epoch [259/500], Loss: 0.0073450361378490925 Epoch [260/500], Loss: 0.015811219811439514 Epoch [261/500], Loss: 0.004212500061839819 Epoch [262/500], Loss: 0.001272354507818818 Epoch [263/500], Loss: 0.005261383485049009 Epoch [264/500], Loss: 0.0038850714918226004 Epoch [265/500], Loss: 0.05691572651267052 Epoch [266/500], Loss: 0.006289471406489611 Epoch [267/500], Loss: 0.002757476642727852 Epoch [268/500], Loss: 0.021156737580895424 Epoch [269/500], Loss: 0.0023931332398205996 Epoch [270/500], Loss: 0.006766161881387234 Epoch [271/500], Loss: 0.02514025941491127 Epoch [272/500], Loss: 0.00140485935844481 Epoch [273/500], Loss: 0.0009262938983738422 Epoch [274/500], Loss: 0.004550900310277939 Epoch [275/500], Loss: 0.00421573081985116 Epoch [276/500], Loss: 0.00157522177323699 Epoch [277/500], Loss: 0.0007012173300608993 Epoch [278/500], Loss: 0.026990199461579323 Epoch [279/500], Loss: 4.483868178795092e-05 Epoch [280/500], Loss: 0.004302495159208775 Epoch [281/500], Loss: 0.008067263290286064 Epoch [282/500], Loss: 0.011439524590969086 Epoch [283/500], Loss: 9.122218762058765e-05 Epoch [284/500], Loss: 0.0031750069465488195 Epoch [285/500], Loss: 0.029386484995484352 Epoch [286/500], Loss: 0.005599193274974823 Epoch [287/500], Loss: 0.005738256499171257 Epoch [288/500], Loss: 0.06986704468727112 Epoch [289/500], Loss: 0.0018949476070702076 Epoch [290/500], Loss: 0.09041392058134079 Epoch [291/500], Loss: 0.018560271710157394 Epoch [292/500], Loss: 0.0026267501525580883 Epoch [293/500], Loss: 0.0019922247156500816 Epoch [294/500], Loss: 0.017104022204875946 Epoch [295/500], Loss: 0.0005694482824765146 Epoch [296/500], Loss: 0.007507719565182924 Epoch [297/500], Loss: 0.006482328288257122 Epoch [298/500], Loss: 0.0024087103083729744 Epoch [299/500], Loss: 0.05851000174880028 Epoch [300/500], Loss: 0.0011563425650820136 Epoch [301/500], Loss: 0.011744212359189987 Epoch [302/500], Loss: 0.002184823388233781 Epoch [303/500], Loss: 0.0058927880600094795 Epoch [304/500], Loss: 0.00927648413926363 Epoch [305/500], Loss: 0.0479230061173439 Epoch [306/500], Loss: 0.004363314248621464 Epoch [307/500], Loss: 0.0086222467944026 Epoch [308/500], Loss: 0.030482163652777672 Epoch [309/500], Loss: 0.003002484329044819 Epoch [310/500], Loss: 0.0066330209374427795 Epoch [311/500], Loss: 0.018022719770669937 Epoch [312/500], Loss: 0.00010116666089743376 Epoch [313/500], Loss: 0.00018753194308374077 Epoch [314/500], Loss: 0.004829633980989456 Epoch [315/500], Loss: 0.00032581284176558256 Epoch [316/500], Loss: 0.002615016885101795 Epoch [317/500], Loss: 0.007050527725368738 Epoch [318/500], Loss: 0.005131404846906662 Epoch [319/500], Loss: 0.0011197510175406933 Epoch [320/500], Loss: 0.05249177664518356 Epoch [321/500], Loss: 0.03191399574279785 Epoch [322/500], Loss: 0.00019152279128320515 Epoch [323/500], Loss: 0.004909854382276535 Epoch [324/500], Loss: 0.011886325664818287 Epoch [325/500], Loss: 0.029640868306159973 Epoch [326/500], Loss: 0.031823888421058655 Epoch [327/500], Loss: 0.0062920390628278255 Epoch [328/500], Loss: 0.00017116339586209506 Epoch [329/500], Loss: 0.001436929334886372 Epoch [330/500], Loss: 0.01118563488125801 Epoch [331/500], Loss: 0.00213825237005949 Epoch [332/500], Loss: 0.03062581643462181 Epoch [333/500], Loss: 0.0002852695470210165 Epoch [334/500], Loss: 0.07077590376138687 Epoch [335/500], Loss: 0.003759284969419241 Epoch [336/500], Loss: 0.0001586130092618987 Epoch [337/500], Loss: 0.0017211722442880273 Epoch [338/500], Loss: 0.02109840139746666 Epoch [339/500], Loss: 3.771989213419147e-05 Epoch [340/500], Loss: 0.032939717173576355 Epoch [341/500], Loss: 0.00371516402810812 Epoch [342/500], Loss: 8.676133438711986e-05 Epoch [343/500], Loss: 0.08196665346622467 Epoch [344/500], Loss: 0.013854238204658031 Epoch [345/500], Loss: 0.0006285720155574381 Epoch [346/500], Loss: 0.02226456254720688 Epoch [347/500], Loss: 0.014572139829397202 Epoch [348/500], Loss: 0.0019131932640448213 Epoch [349/500], Loss: 0.00010305445175617933 Epoch [350/500], Loss: 0.0005039064562879503 Epoch [351/500], Loss: 0.004073971416801214 Epoch [352/500], Loss: 0.033171914517879486 Epoch [353/500], Loss: 0.010129127651453018 Epoch [354/500], Loss: 0.0022702424321323633 Epoch [355/500], Loss: 0.008167754858732224 Epoch [356/500], Loss: 0.00785281416028738 Epoch [357/500], Loss: 0.000322493928251788 Epoch [358/500], Loss: 0.004858750384300947 Epoch [359/500], Loss: 0.004881981760263443 Epoch [360/500], Loss: 0.00746745802462101 Epoch [361/500], Loss: 0.010379328392446041 Epoch [362/500], Loss: 0.0058652726002037525 Epoch [363/500], Loss: 0.0004618314269464463 Epoch [364/500], Loss: 0.006085830274969339 Epoch [365/500], Loss: 0.000941723701544106 Epoch [366/500], Loss: 0.03767510876059532 Epoch [367/500], Loss: 0.011685237288475037 Epoch [368/500], Loss: 0.0008707785164006054 Epoch [369/500], Loss: 0.02531655691564083 Epoch [370/500], Loss: 0.004996206145733595 Epoch [371/500], Loss: 0.0010867653181776404 Epoch [372/500], Loss: 0.0006999694742262363 Epoch [373/500], Loss: 0.01705274172127247 Epoch [374/500], Loss: 0.05384088680148125 Epoch [375/500], Loss: 0.01126344595104456 Epoch [376/500], Loss: 0.015743853524327278 Epoch [377/500], Loss: 0.0320991650223732 Epoch [378/500], Loss: 0.0005882499390281737 Epoch [379/500], Loss: 0.0021416516974568367 Epoch [380/500], Loss: 0.05442241206765175 Epoch [381/500], Loss: 0.0015610777772963047 Epoch [382/500], Loss: 0.0005470622563734651 Epoch [383/500], Loss: 0.018941041082143784 Epoch [384/500], Loss: 0.012094169855117798 Epoch [385/500], Loss: 0.019305747002363205 Epoch [386/500], Loss: 0.07708469778299332 Epoch [387/500], Loss: 0.002655626507475972 Epoch [388/500], Loss: 0.004292313475161791 Epoch [389/500], Loss: 0.07936340570449829 Epoch [390/500], Loss: 0.0016670332988724113 Epoch [391/500], Loss: 0.016174355521798134 Epoch [392/500], Loss: 0.006798918824642897 Epoch [393/500], Loss: 0.00029492948669940233 Epoch [394/500], Loss: 0.00011687564983731136 Epoch [395/500], Loss: 0.017136242240667343 Epoch [396/500], Loss: 0.03577888756990433 Epoch [397/500], Loss: 0.0003159995249006897 Epoch [398/500], Loss: 0.015117786824703217 Epoch [399/500], Loss: 0.0011421125382184982 Epoch [400/500], Loss: 0.015192978084087372 Epoch [401/500], Loss: 0.004745943006128073 Epoch [402/500], Loss: 0.01897788792848587 Epoch [403/500], Loss: 0.003992446698248386 Epoch [404/500], Loss: 0.002169169718399644 Epoch [405/500], Loss: 0.00431237043812871 Epoch [406/500], Loss: 0.009031831286847591 Epoch [407/500], Loss: 0.0004428187385201454 Epoch [408/500], Loss: 0.0013586758868768811 Epoch [409/500], Loss: 0.0005756563041359186 Epoch [410/500], Loss: 0.0019101585494354367 Epoch [411/500], Loss: 0.007107924669981003 Epoch [412/500], Loss: 0.030316229909658432 Epoch [413/500], Loss: 0.0037615930195897818 Epoch [414/500], Loss: 0.07749312371015549 Epoch [415/500], Loss: 0.0011442011455073953 Epoch [416/500], Loss: 0.02641211822628975 Epoch [417/500], Loss: 0.0019013454439118505 Epoch [418/500], Loss: 0.0012308870209380984 Epoch [419/500], Loss: 0.029525063931941986 Epoch [420/500], Loss: 0.0033975038677453995 Epoch [421/500], Loss: 0.019639603793621063 Epoch [422/500], Loss: 0.026813963428139687 Epoch [423/500], Loss: 0.09774226695299149 Epoch [424/500], Loss: 0.0067322226241230965 Epoch [425/500], Loss: 0.0039830454625189304 Epoch [426/500], Loss: 0.007469410542398691 Epoch [427/500], Loss: 0.006599951535463333 Epoch [428/500], Loss: 0.0003117715532425791 Epoch [429/500], Loss: 0.06321053951978683 Epoch [430/500], Loss: 0.02796204388141632 Epoch [431/500], Loss: 0.01125897467136383 Epoch [432/500], Loss: 0.012813759967684746 Epoch [433/500], Loss: 0.00016852852422744036 Epoch [434/500], Loss: 4.32200358773116e-05 Epoch [435/500], Loss: 4.2314281017752364e-05 Epoch [436/500], Loss: 0.0017623314633965492 Epoch [437/500], Loss: 0.00034134119050577283 Epoch [438/500], Loss: 0.003189063398167491 Epoch [439/500], Loss: 0.012208868749439716 Epoch [440/500], Loss: 0.00021606399968732148 Epoch [441/500], Loss: 0.0074579231441020966 Epoch [442/500], Loss: 0.056500423699617386 Epoch [443/500], Loss: 0.00363926007412374 Epoch [444/500], Loss: 0.005867509637027979 Epoch [445/500], Loss: 0.024584908038377762 Epoch [446/500], Loss: 0.003773406380787492 Epoch [447/500], Loss: 0.0007757568964734674 Epoch [448/500], Loss: 0.0009093397529795766 Epoch [449/500], Loss: 0.10213126242160797 Epoch [450/500], Loss: 0.0014893009793013334 Epoch [451/500], Loss: 0.037768542766571045 Epoch [452/500], Loss: 0.004203153774142265 Epoch [453/500], Loss: 0.06414327025413513 Epoch [454/500], Loss: 0.00043996336171403527 Epoch [455/500], Loss: 0.07357332110404968 Epoch [456/500], Loss: 0.00012314450577832758 Epoch [457/500], Loss: 0.001822543446905911 Epoch [458/500], Loss: 0.007053156848996878 Epoch [459/500], Loss: 0.0015076244017109275 Epoch [460/500], Loss: 0.06966470181941986 Epoch [461/500], Loss: 0.0076424842700362206 Epoch [462/500], Loss: 0.007046313025057316 Epoch [463/500], Loss: 0.0007923615048639476 Epoch [464/500], Loss: 0.0010421440238133073 Epoch [465/500], Loss: 0.0005530701600946486 Epoch [466/500], Loss: 0.019697077572345734 Epoch [467/500], Loss: 0.01717797853052616 Epoch [468/500], Loss: 0.027404241263866425 Epoch [469/500], Loss: 0.02524019032716751 Epoch [470/500], Loss: 0.0001539852819405496 Epoch [471/500], Loss: 1.759162114467472e-05 Epoch [472/500], Loss: 0.016116004437208176 Epoch [473/500], Loss: 0.03092670999467373 Epoch [474/500], Loss: 0.002649572677910328 Epoch [475/500], Loss: 0.0005785493995063007 Epoch [476/500], Loss: 0.0015102593461051583 Epoch [477/500], Loss: 0.024442831054329872 Epoch [478/500], Loss: 0.004341053776443005 Epoch [479/500], Loss: 0.0004253871738910675 Epoch [480/500], Loss: 0.005762408021837473 Epoch [481/500], Loss: 0.004989593755453825 Epoch [482/500], Loss: 0.025062531232833862 Epoch [483/500], Loss: 0.03994796425104141 Epoch [484/500], Loss: 0.0008264294592663646 Epoch [485/500], Loss: 0.0017936986405402422 Epoch [486/500], Loss: 0.0003618694026954472 Epoch [487/500], Loss: 9.855670214165002e-05 Epoch [488/500], Loss: 0.004056067205965519 Epoch [489/500], Loss: 0.0008443252881988883 Epoch [490/500], Loss: 0.004218078218400478 Epoch [491/500], Loss: 0.00034481531474739313 Epoch [492/500], Loss: 0.024722788482904434 Epoch [493/500], Loss: 0.023092253133654594 Epoch [494/500], Loss: 0.00404107291251421 Epoch [495/500], Loss: 0.025037666782736778 Epoch [496/500], Loss: 0.02405189536511898 Epoch [497/500], Loss: 0.06052062287926674 Epoch [498/500], Loss: 8.332116703968495e-05 Epoch [499/500], Loss: 0.0018309121951460838 Epoch [500/500], Loss: 0.0009281415259465575
In [90]:
# 在测试集上评估模型 model.eval() total_loss = 0.0 with torch.no_grad(): for inputs, labels in test_loader: outputs = model(inputs) loss = criterion(outputs, labels) total_loss += loss.item() mean_loss = total_loss / len(test_loader) print(f'Mean Squared Error on Test Set: {mean_loss}')
Mean Squared Error on Test Set: 0.044565981098761163
In [91]:
# 在测试集上评估模型 model.eval() test_pred = [] test_label = [] with torch.no_grad(): for inputs, labels in test_loader: outputs = model(inputs) test_pred += [i[0] for i in scaler.inverse_transform(outputs)] test_label += [i[0] for i in scaler.inverse_transform(labels)] get_acc(test_pred,test_label)
Mean Squared Error: 699595.7527474399 Root Mean Squared Error: 836.418407704804 Mean Absolute Error: 518.1976214232881 Coefficient of Determination (R²): 0.3576555083971398
孔体积(cm3/g)¶
In [28]:
from sklearn.model_selection import train_test_split X = data_2[['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)', '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']] y = data_2['孔体积(cm3/g)'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
In [29]:
from sklearn.linear_model import LinearRegression clf = LinearRegression() clf.fit(X_train, y_train) test_pred = clf.predict(X_test)
In [30]:
get_acc(y_test,test_pred)
Mean Squared Error: 0.23406172859604119 Root Mean Squared Error: 0.4837992647741842 Mean Absolute Error: 0.2891327306690083 Coefficient of Determination (R²): 0.17152237332862152
In [31]:
coefficients = clf.coef_ key = ['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)', '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3'] for index,i in enumerate(coefficients): print(key[index] + ':' + str(i))
煤种:-0.5334104525105708 分析水Mad:1.049110275675087 灰分:-0.47087329582498033 挥发分:-0.09779320575303108 碳:0.8104530679119436 氢:0.6541821436278573 氮:-0.13529073464861613 硫:0.11133231819862932 氧:0.9365424234738452 碳化温度(℃):1.3646667632508422 升温速率(℃/min):-0.05147059728009279 保温时间(h):0.8081640623452375 KOH:0.8825591062288682 K2CO3:-0.46868652599186134
In [32]:
import lightgbm as lgb # 创建 LightGBM 数据集 train_data = lgb.Dataset(X_train, label=y_train) test_data = lgb.Dataset(X_test, label=y_test, reference=train_data) # 设置参数 params = { 'objective': 'regression', # 回归问题 'boosting_type': 'gbdt', # 使用梯度提升 'metric': 'rmse', # 使用均方根误差作为评估指标 'num_leaves': 2, # 每棵树的叶子节点数 'learning_rate': 0.01, # 学习率 } # 训练模型 #定义callback回调 callback=[lgb.early_stopping(stopping_rounds=10,verbose=True), lgb.log_evaluation(period=10,show_stdv=True)] # 训练 train m1 = lgb.train(params,train_data,num_boost_round=200, valid_sets=[train_data,test_data],callbacks=callback) # 进行预测 y_pred = m1.predict(X_test, num_iteration=m1.best_iteration) get_acc(y_test,y_pred)
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000325 seconds. You can set `force_col_wise=true` to remove the overhead. [LightGBM] [Info] Total Bins 112 [LightGBM] [Info] Number of data points in the train set: 58, number of used features: 11 [LightGBM] [Info] Start training from score 1.226647 Training until validation scores don't improve for 10 rounds [10] training's rmse: 0.638817 valid_1's rmse: 0.579195 [20] training's rmse: 0.62939 valid_1's rmse: 0.5711 [30] training's rmse: 0.621342 valid_1's rmse: 0.567154 [40] training's rmse: 0.614175 valid_1's rmse: 0.565478 [50] training's rmse: 0.607441 valid_1's rmse: 0.562633 [60] training's rmse: 0.601112 valid_1's rmse: 0.560704 [70] training's rmse: 0.595164 valid_1's rmse: 0.558691 [80] training's rmse: 0.589578 valid_1's rmse: 0.556304 [90] training's rmse: 0.584311 valid_1's rmse: 0.55425 [100] training's rmse: 0.579347 valid_1's rmse: 0.552013 [110] training's rmse: 0.574669 valid_1's rmse: 0.550285 [120] training's rmse: 0.570262 valid_1's rmse: 0.547892 [130] training's rmse: 0.566114 valid_1's rmse: 0.546906 [140] training's rmse: 0.562209 valid_1's rmse: 0.544828 [150] training's rmse: 0.558535 valid_1's rmse: 0.543634 [160] training's rmse: 0.555081 valid_1's rmse: 0.542251 [170] training's rmse: 0.551832 valid_1's rmse: 0.54127 [180] training's rmse: 0.54878 valid_1's rmse: 0.540094 [190] training's rmse: 0.545912 valid_1's rmse: 0.539304 [200] training's rmse: 0.543218 valid_1's rmse: 0.538589 Did not meet early stopping. Best iteration is: [200] training's rmse: 0.543218 valid_1's rmse: 0.538589 Mean Squared Error: 0.29007798664843815 Root Mean Squared Error: 0.5385888846313468 Mean Absolute Error: 0.4479232290957567 Coefficient of Determination (R²): -0.026751034308880817
In [33]:
plt.figure(figsize=(12,6)) lgb.plot_importance(m1, max_num_features=30) plt.title("Featurertances") plt.show()
<Figure size 1200x600 with 0 Axes>
微孔体积(cm3/g)¶
In [34]:
X = data_3[['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)', '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']] y = data_3['微孔体积(cm3/g)'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
In [35]:
from sklearn.linear_model import LinearRegression clf = LinearRegression() clf.fit(X_train, y_train) test_pred = clf.predict(X_test)
In [36]:
get_acc(y_test,test_pred)
Mean Squared Error: 0.058644300055131 Root Mean Squared Error: 0.24216585237215218 Mean Absolute Error: 0.1869146665913017 Coefficient of Determination (R²): 0.6014531524700744
In [37]:
coefficients = clf.coef_ key = ['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)', '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3'] for index,i in enumerate(coefficients): print(key[index] + ':' + str(i))
煤种:0.038009384028790595 分析水Mad:0.5270239540332698 灰分:-0.2935950026944355 挥发分:-0.47505236069221957 碳:0.7925565775686018 氢:1.0226097972268275 氮:-0.2164263814475165 硫:-0.187314647621951 氧:0.4324238526226014 碳化温度(℃):0.5946628823386292 升温速率(℃/min):-0.033400473160046816 保温时间(h):0.27469886155258133 KOH:0.6542891650386291 K2CO3:-0.02072386352158367
In [38]:
import lightgbm as lgb # 创建 LightGBM 数据集 train_data = lgb.Dataset(X_train, label=y_train) test_data = lgb.Dataset(X_test, label=y_test, reference=train_data) # 设置参数 params = { 'objective': 'regression', # 回归问题 'boosting_type': 'gbdt', # 使用梯度提升 'metric': 'rmse', # 使用均方根误差作为评估指标 'num_leaves': 2, # 每棵树的叶子节点数 'learning_rate': 0.01, # 学习率 } # 训练模型 #定义callback回调 callback=[lgb.early_stopping(stopping_rounds=10,verbose=True), lgb.log_evaluation(period=10,show_stdv=True)] # 训练 train m1 = lgb.train(params,train_data,num_boost_round=200, valid_sets=[train_data,test_data],callbacks=callback) # 进行预测 y_pred = m1.predict(X_test, num_iteration=m1.best_iteration) get_acc(y_test,y_pred)
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000143 seconds. You can set `force_col_wise=true` to remove the overhead. [LightGBM] [Info] Total Bins 120 [LightGBM] [Info] Number of data points in the train set: 57, number of used features: 11 [LightGBM] [Info] Start training from score 0.730716 Training until validation scores don't improve for 10 rounds [10] training's rmse: 0.396872 valid_1's rmse: 0.372902 [20] training's rmse: 0.389406 valid_1's rmse: 0.360807 [30] training's rmse: 0.383114 valid_1's rmse: 0.351237 [40] training's rmse: 0.377736 valid_1's rmse: 0.342674 [50] training's rmse: 0.373147 valid_1's rmse: 0.335013 [60] training's rmse: 0.369238 valid_1's rmse: 0.32816 [70] training's rmse: 0.365907 valid_1's rmse: 0.322454 [80] training's rmse: 0.363019 valid_1's rmse: 0.317971 [90] training's rmse: 0.360347 valid_1's rmse: 0.314376 [100] training's rmse: 0.357855 valid_1's rmse: 0.31094 [110] training's rmse: 0.355528 valid_1's rmse: 0.30764 [120] training's rmse: 0.353358 valid_1's rmse: 0.304501 [130] training's rmse: 0.351336 valid_1's rmse: 0.301502 [140] training's rmse: 0.349451 valid_1's rmse: 0.298626 [150] training's rmse: 0.347695 valid_1's rmse: 0.295886 [160] training's rmse: 0.34606 valid_1's rmse: 0.293263 [170] training's rmse: 0.344537 valid_1's rmse: 0.29076 [180] training's rmse: 0.34312 valid_1's rmse: 0.288368 [190] training's rmse: 0.341801 valid_1's rmse: 0.286156 [200] training's rmse: 0.340544 valid_1's rmse: 0.283861 Did not meet early stopping. Best iteration is: [200] training's rmse: 0.340544 valid_1's rmse: 0.283861 Mean Squared Error: 0.0805769145909526 Root Mean Squared Error: 0.2838607309772745 Mean Absolute Error: 0.2001926545731756 Coefficient of Determination (R²): 0.4523990351368772
In [39]:
plt.figure(figsize=(12,6)) lgb.plot_importance(m1, max_num_features=30) plt.title("Featurertances") plt.show()
<Figure size 1200x600 with 0 Axes>
介孔体积(cm3/g)¶
In [40]:
X = data_4[['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)', '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']] y = data_4['介孔体积(cm3/g)'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
In [41]:
from sklearn.linear_model import LinearRegression clf = LinearRegression() clf.fit(X_train, y_train) test_pred = clf.predict(X_test)
In [42]:
get_acc(y_test,test_pred)
Mean Squared Error: 0.633360271830096 Root Mean Squared Error: 0.7958393505162308 Mean Absolute Error: 0.5042366422580364 Coefficient of Determination (R²): -1.5081213582093067
In [43]:
coefficients = clf.coef_ key = ['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)', '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3'] for index,i in enumerate(coefficients): print(key[index] + ':' + str(i))
煤种:-0.5607084300067899 分析水Mad:0.4984214658264311 灰分:-0.14117597792825254 挥发分:0.7366654205853167 碳:-8.993414025590598 氢:-3.3280548911300323 氮:-0.7426776489865452 硫:-0.30171686040116197 氧:-4.8875223095614375 碳化温度(℃):0.6126040745268642 升温速率(℃/min):-0.07211666342106407 保温时间(h):0.23227893599959817 KOH:0.32346263890721333 K2CO3:-0.30227894335703315
In [44]:
import lightgbm as lgb # 创建 LightGBM 数据集 train_data = lgb.Dataset(X_train, label=y_train) test_data = lgb.Dataset(X_test, label=y_test, reference=train_data) # 设置参数 params = { 'objective': 'regression', # 回归问题 'boosting_type': 'gbdt', # 使用梯度提升 'metric': 'rmse', # 使用均方根误差作为评估指标 'num_leaves': 2, # 每棵树的叶子节点数 'learning_rate': 0.01, # 学习率 } # 训练模型 #定义callback回调 callback=[lgb.early_stopping(stopping_rounds=10,verbose=True), lgb.log_evaluation(period=10,show_stdv=True)] # 训练 train m1 = lgb.train(params,train_data,num_boost_round=200, valid_sets=[train_data,test_data],callbacks=callback) # 进行预测 y_pred = m1.predict(X_test, num_iteration=m1.best_iteration) get_acc(y_test,y_pred)
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000148 seconds. You can set `force_col_wise=true` to remove the overhead. [LightGBM] [Info] Total Bins 74 [LightGBM] [Info] Number of data points in the train set: 48, number of used features: 9 [LightGBM] [Info] Start training from score 0.524919 Training until validation scores don't improve for 10 rounds [10] training's rmse: 0.428728 valid_1's rmse: 0.487862 [20] training's rmse: 0.417581 valid_1's rmse: 0.477524 [30] training's rmse: 0.407814 valid_1's rmse: 0.470645 [40] training's rmse: 0.399242 valid_1's rmse: 0.464887 [50] training's rmse: 0.391738 valid_1's rmse: 0.460082 [60] training's rmse: 0.385183 valid_1's rmse: 0.456108 [70] training's rmse: 0.379467 valid_1's rmse: 0.452842 [80] training's rmse: 0.374493 valid_1's rmse: 0.450179 [90] training's rmse: 0.370172 valid_1's rmse: 0.448034 [100] training's rmse: 0.366423 valid_1's rmse: 0.446322 [110] training's rmse: 0.363176 valid_1's rmse: 0.444977 [120] training's rmse: 0.360366 valid_1's rmse: 0.443942 [130] training's rmse: 0.357934 valid_1's rmse: 0.443339 [140] training's rmse: 0.35582 valid_1's rmse: 0.443088 [150] training's rmse: 0.353985 valid_1's rmse: 0.44256 [160] training's rmse: 0.352391 valid_1's rmse: 0.442537 [170] training's rmse: 0.350995 valid_1's rmse: 0.442414 Early stopping, best iteration is: [167] training's rmse: 0.351395 valid_1's rmse: 0.442179 Mean Squared Error: 0.19552236767207556 Root Mean Squared Error: 0.44217911265919785 Mean Absolute Error: 0.30130148562563 Coefficient of Determination (R²): 0.22572689166468962
In [45]:
plt.figure(figsize=(12,6)) lgb.plot_importance(m1, max_num_features=30) plt.title("Featurertances") plt.show()
<Figure size 1200x600 with 0 Axes>