coal_materials/data/20240102/数据模型测试.ipynb

1.1 MiB
Raw Blame History

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
import matplotlib.pyplot as plt
plt.rcParams["font.sans-serif"]=["SimHei"] #设置字体
plt.rcParams["axes.unicode_minus"]=False #该语句解决图像中的“-”负号的乱码问题
In [3]:
data_path = "./data/煤质碳材料数据.xlsx"
In [4]:
data = pd.read_excel(data_path)
In [5]:
# 缺失值填充
## 根据煤种分类,将缺失值进行补全,采用均值填充
## 用不上因为分析数据X值缺失的时候Y也会缺失
## 萃取中级烟煤还只有一条数据,无法参考补全
## 直接删除X缺失数据

# 补全:
# grouped = data.groupby('煤种')
# # 定义填充函数
# def fill_with_mean(group, name):
#     group[name].fillna(group[name].mean(), inplace=True)
#     return group

# # 在每个分组内使用均值法填充空值
# data_filled = grouped.apply(fill_with_mean,"分析水Mad")

# 删除某一列
data_full = data.dropna(axis=0,subset = ['编号', '煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',
       '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3'])
del data_full['编号']
In [1]:
import matplotlib as mpl
print(mpl.get_cachedir())
/root/.cache/matplotlib
In [6]:
data_full = data_full.reset_index(drop=True)
In [ ]:
# one-hot 编码处理煤种数据
#from sklearn.preprocessing import OneHotEncoder
# encoder = OneHotEncoder()
# encoded_data = encoder.fit_transform(data_full[['煤种']])
# # 将稀疏矩阵转换为数组
# encoded_array = encoded_data.toarray()
# # 创建编码后的 DataFrame
# encoded_df = pd.DataFrame(encoded_array, columns=encoder.get_feature_names_out(['煤种']))
# data_full_one_hot = pd.concat([data_full, encoded_df], axis=1)
# del data_full_one_hot['煤种']
In [8]:
# 字典映射后准备归一化
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoded_labels = encoder.fit_transform(data_full['煤种'])
data_full['煤种'] = encoded_labels
In [9]:
# 归一化,数据量太小了,且数据无正态分布一说感觉,直接归一化

x_col = ['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',
       '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']

y_col = ['孔体积cm3/g)','微孔体积cm3/g)', '介孔体积cm3/g)','BET比表面积m2/g']
# from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()
# normalized_data = scaler.fit_transform(data_full[x_col])

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

normalized_data = scaler.fit_transform(data_full[x_col])
normalized_df = pd.DataFrame(normalized_data, columns=x_col)
data_full_minmax = pd.concat([normalized_df,data_full[y_col]],axis=1)
In [10]:
data_full_minmax
Out[10]:
煤种 分析水Mad 灰分 挥发分 碳化温度(℃) 升温速率(℃/min) 保温时间(h) KOH K2CO3 孔体积cm3/g) 微孔体积cm3/g) 介孔体积cm3/g) BET比表面积m2/g
0 0.090909 0.040520 0.176027 0.579416 0.897402 0.181024 0.333333 0.000000 0.141774 1.0 0.000000 0.6 0.0 0.0 0.270 NaN NaN 296.0
1 0.727273 0.436127 0.089271 0.755583 0.552794 0.131548 0.245763 1.000000 0.670623 0.1 0.285714 0.0 1.0 0.0 0.356 0.289 0.067 665.0
2 0.727273 0.436127 0.089271 0.755583 0.552794 0.131548 0.245763 1.000000 0.670623 0.1 0.285714 0.0 1.0 0.0 0.608 0.482 0.126 1221.0
3 0.727273 0.436127 0.089271 0.755583 0.552794 0.131548 0.245763 1.000000 0.670623 0.1 0.285714 0.0 1.0 0.0 1.438 0.670 0.768 2609.0
4 0.727273 0.436127 0.089271 0.755583 0.552794 0.131548 0.245763 1.000000 0.670623 0.1 0.285714 0.0 1.0 0.0 1.321 0.599 0.722 2323.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
64 0.272727 0.000000 0.085080 0.151960 0.998131 0.096042 0.378531 0.058989 0.000000 0.4 0.107143 0.2 1.0 0.0 1.608 1.204 0.404 3142.0
65 0.272727 0.000000 0.085080 0.151960 0.998131 0.096042 0.378531 0.058989 0.000000 0.4 0.107143 0.2 1.0 0.0 2.041 1.022 1.019 3389.0
66 0.272727 0.002165 0.174560 0.137279 1.000000 0.000000 0.457627 0.000000 0.048797 0.2 0.107143 0.2 1.0 0.0 1.135 0.916 0.219 2542.0
67 0.272727 0.002165 0.174560 0.137279 1.000000 0.000000 0.457627 0.000000 0.048797 0.4 0.107143 0.2 1.0 0.0 1.219 0.947 0.272 2665.0
68 0.272727 0.002165 0.174560 0.137279 1.000000 0.000000 0.457627 0.000000 0.048797 0.6 0.107143 0.2 1.0 0.0 1.473 0.718 0.755 2947.0

69 rows × 18 columns

四套数据集

In [12]:
data_1=data_full_minmax.drop(columns=['孔体积cm3/g)','微孔体积cm3/g)', '介孔体积cm3/g)'])
data_1 = data_1.dropna(axis=0, subset=['BET比表面积m2/g'])
In [13]:
train_corr = data_1.corr()
ax = plt.subplots(figsize=(20,16))
ax = sns.heatmap(train_corr, vmax=.8, square=True, annot=True)
No description has been provided for this image
In [14]:
data_2 = data_full_minmax.drop(columns=['BET比表面积m2/g','微孔体积cm3/g)', '介孔体积cm3/g)'])
data_2 = data_2.dropna(axis=0, subset=['孔体积cm3/g)'])
In [15]:
train_corr = data_2.corr()
ax = plt.subplots(figsize=(20,16))
ax = sns.heatmap(train_corr, vmax=.8, square=True, annot=True)
No description has been provided for this image
In [16]:
data_3 = data_full_minmax.drop(columns=['BET比表面积m2/g','孔体积cm3/g)', '介孔体积cm3/g)'])
data_3 = data_3.dropna(axis=0, subset=['微孔体积cm3/g)'])
In [17]:
train_corr = data_3.corr()
ax = plt.subplots(figsize=(20,16))
ax = sns.heatmap(train_corr, vmax=.8, square=True, annot=True)
No description has been provided for this image
In [18]:
data_4 = data_full_minmax.drop(columns=['BET比表面积m2/g','孔体积cm3/g)', '微孔体积cm3/g)'])
data_4 = data_4.dropna(axis=0, subset=['介孔体积cm3/g)'])
In [19]:
train_corr = data_4.corr()
ax = plt.subplots(figsize=(20,16))
ax = sns.heatmap(train_corr, vmax=.8, square=True, annot=True)
No description has been provided for this image

BET比表面积m2/g

In [20]:
from sklearn.model_selection import train_test_split
X = data_1[['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',
       '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']]
y = data_1['BET比表面积m2/g']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

线性回归

In [21]:
from sklearn.linear_model import LinearRegression
clf = LinearRegression()
clf.fit(X_train, y_train)
test_pred = clf.predict(X_test)
In [22]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# 计算均方误差MSE
def get_acc(y_test, test_pred):
    mse = mean_squared_error(y_test,test_pred)
    print("Mean Squared Error:", mse)

    # 计算均方根误差RMSE
    rmse = np.sqrt(mse)
    print("Root Mean Squared Error:", rmse)

    # 计算平均绝对误差MAE
    mae = mean_absolute_error(y_test,test_pred)
    print("Mean Absolute Error:", mae)

    # 计算决定系数
    r2 = r2_score(y_test,test_pred)
    print("Coefficient of Determination (R²):", r2)
In [23]:
get_acc(y_test,test_pred)
Mean Squared Error: 356346.0854506917
Root Mean Squared Error: 596.9473054220881
Mean Absolute Error: 421.32564458570505
Coefficient of Determination (R²): 0.6570769358934058
In [24]:
coefficients = clf.coef_
key = ['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',
       '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']
for index,i in enumerate(coefficients):
    print(key[index] + ':' + str(i))
煤种:-877.2972339819474
分析水Mad:1114.112507797246
灰分:-1080.5492474856756
挥发分:-1014.5682008519212
碳:2502.1689304953256
氢:1547.1118343327066
氮:-310.53231471047394
硫:622.2484614557459
氧:1735.089974709626
碳化温度(℃):949.2902933052904
升温速率(℃/min):16.004040022910374
保温时间(h):999.0720137977573
KOH:1593.2510277610688
K2CO3:-284.0768698715867

LightGBM

In [25]:
import lightgbm as lgb
# 创建 LightGBM 数据集
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# 设置参数
params = {
    'objective': 'regression',  # 回归问题
    'boosting_type': 'gbdt',     # 使用梯度提升
    'metric': 'rmse',            # 使用均方根误差作为评估指标
    'num_leaves': 2,            # 每棵树的叶子节点数
    'learning_rate': 0.01,       # 学习率
}
# 训练模型
#定义callback回调
callback=[lgb.early_stopping(stopping_rounds=10,verbose=True),
          lgb.log_evaluation(period=10,show_stdv=True)]
# 训练 train
m1 = lgb.train(params,train_data,num_boost_round=200,
               valid_sets=[train_data,test_data],callbacks=callback)

# 进行预测
y_pred = m1.predict(X_test, num_iteration=m1.best_iteration)
get_acc(y_test,y_pred)
[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000416 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 112
[LightGBM] [Info] Number of data points in the train set: 58, number of used features: 11
[LightGBM] [Info] Start training from score 2093.679985
Training until validation scores don't improve for 10 rounds
[10]	training's rmse: 963.582	valid_1's rmse: 1065.25
[20]	training's rmse: 951.788	valid_1's rmse: 1058.77
[30]	training's rmse: 941.053	valid_1's rmse: 1052.87
[40]	training's rmse: 931.293	valid_1's rmse: 1047.48
[50]	training's rmse: 922.404	valid_1's rmse: 1041.26
[60]	training's rmse: 914.075	valid_1's rmse: 1032.99
[70]	training's rmse: 906.208	valid_1's rmse: 1025.87
[80]	training's rmse: 898.771	valid_1's rmse: 1017.92
[90]	training's rmse: 891.757	valid_1's rmse: 1011.48
[100]	training's rmse: 885.133	valid_1's rmse: 1004.27
[110]	training's rmse: 878.885	valid_1's rmse: 997.768
[120]	training's rmse: 872.995	valid_1's rmse: 992.232
[130]	training's rmse: 867.436	valid_1's rmse: 987.243
[140]	training's rmse: 862.191	valid_1's rmse: 984.017
[150]	training's rmse: 857.24	valid_1's rmse: 979.536
[160]	training's rmse: 852.571	valid_1's rmse: 975.56
[170]	training's rmse: 848.168	valid_1's rmse: 971.554
[180]	training's rmse: 844	valid_1's rmse: 969.917
[190]	training's rmse: 840.022	valid_1's rmse: 966.922
[200]	training's rmse: 836.228	valid_1's rmse: 965.609
Did not meet early stopping. Best iteration is:
[200]	training's rmse: 836.228	valid_1's rmse: 965.609
Mean Squared Error: 932401.3299617766
Root Mean Squared Error: 965.609305030651
Mean Absolute Error: 803.9783213816435
Coefficient of Determination (R²): 0.1027208264595928
In [26]:
plt.figure(figsize=(12,6))
lgb.plot_importance(m1, max_num_features=30)
plt.title("Featurertances")
plt.show()
<Figure size 1200x600 with 0 Axes>
No description has been provided for this image

深度学习

In [27]:
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error
In [49]:
X = data_1[['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',
       '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']]
y = data_1['BET比表面积m2/g']
In [50]:
x_tensor = torch.Tensor(X.values)  # 将训练数据 x 转换为张量
y_tensor = torch.Tensor(y.values).reshape(-1,1)  # 将标签数据 y 转换为张量
In [51]:
#dataset = TensorDataset(x_tensor, y_tensor)
test_size = 0.15  # 设置测试集的比例
random_seed = 42  # 设置随机种子以确保可重复性
x_train, x_test, y_train, y_test = train_test_split(x_tensor, y_tensor, test_size=test_size, random_state=random_seed)
In [56]:
scaler = MinMaxScaler()
scaler.fit(y_train)
y_train = torch.Tensor(scaler.transform(y_train))
y_test = torch.Tensor(scaler.transform(y_test))
In [61]:
batch_size = 4  # 设置每个批次的大小
train_loader = DataLoader(TensorDataset(x_train, y_train), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(TensorDataset(x_test, y_test), batch_size=batch_size) # drop_last=True
In [38]:
# for i in range(2):
#     for inputs, labels in train_loader:
#         print(labels)
#         break
In [89]:
# 定义一个简单的前馈神经网络

class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.fc2 = nn.Linear(hidden_size, 10)
        self.fc3 = nn.Linear(10,output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.sigmoid(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        x = self.fc3(x)
        return x

# 定义训练参数
input_size = 14 
hidden_size = 40 
output_size = 1  

learning_rate = 0.01
num_epochs = 100

# 创建模型实例
model = Net(input_size, hidden_size, output_size)

# 定义损失函数和优化器
criterion = nn.MSELoss()  # 使用均方误差损失
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 训练回归模型
num_epochs = 500
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')
Epoch [1/500], Loss: 0.1375112235546112
Epoch [2/500], Loss: 0.05806560814380646
Epoch [3/500], Loss: 0.09772428125143051
Epoch [4/500], Loss: 0.021364646032452583
Epoch [5/500], Loss: 0.01773611083626747
Epoch [6/500], Loss: 0.021522678434848785
Epoch [7/500], Loss: 0.008145670406520367
Epoch [8/500], Loss: 0.06583523005247116
Epoch [9/500], Loss: 0.01977309212088585
Epoch [10/500], Loss: 0.023928888142108917
Epoch [11/500], Loss: 0.027664322406053543
Epoch [12/500], Loss: 0.0042370702140033245
Epoch [13/500], Loss: 0.02011457458138466
Epoch [14/500], Loss: 0.0015053569804877043
Epoch [15/500], Loss: 0.03043498657643795
Epoch [16/500], Loss: 0.018061762675642967
Epoch [17/500], Loss: 0.03032355196774006
Epoch [18/500], Loss: 0.003073741216212511
Epoch [19/500], Loss: 0.030238714069128036
Epoch [20/500], Loss: 0.013475775718688965
Epoch [21/500], Loss: 0.0019919639453291893
Epoch [22/500], Loss: 0.0721120536327362
Epoch [23/500], Loss: 0.018716545775532722
Epoch [24/500], Loss: 0.03277812898159027
Epoch [25/500], Loss: 0.0069098640233278275
Epoch [26/500], Loss: 0.02530064806342125
Epoch [27/500], Loss: 0.031360749155282974
Epoch [28/500], Loss: 0.007204529829323292
Epoch [29/500], Loss: 0.008599984459578991
Epoch [30/500], Loss: 0.003325084922835231
Epoch [31/500], Loss: 0.022524351254105568
Epoch [32/500], Loss: 0.014607256278395653
Epoch [33/500], Loss: 0.018749907612800598
Epoch [34/500], Loss: 0.0009139125468209386
Epoch [35/500], Loss: 0.012738749384880066
Epoch [36/500], Loss: 0.033581215888261795
Epoch [37/500], Loss: 0.016028260812163353
Epoch [38/500], Loss: 0.020110584795475006
Epoch [39/500], Loss: 0.02486024796962738
Epoch [40/500], Loss: 0.002677252981811762
Epoch [41/500], Loss: 0.012438181787729263
Epoch [42/500], Loss: 0.01628699153661728
Epoch [43/500], Loss: 0.028455808758735657
Epoch [44/500], Loss: 0.001482177060097456
Epoch [45/500], Loss: 0.006345514673739672
Epoch [46/500], Loss: 0.03298439830541611
Epoch [47/500], Loss: 0.05287860333919525
Epoch [48/500], Loss: 0.0027091410011053085
Epoch [49/500], Loss: 0.0118508730083704
Epoch [50/500], Loss: 0.0038459282368421555
Epoch [51/500], Loss: 0.024839868769049644
Epoch [52/500], Loss: 0.0030711244326084852
Epoch [53/500], Loss: 0.041477471590042114
Epoch [54/500], Loss: 0.04405653104186058
Epoch [55/500], Loss: 0.0023413999006152153
Epoch [56/500], Loss: 0.005603765603154898
Epoch [57/500], Loss: 0.05010407418012619
Epoch [58/500], Loss: 0.002134432550519705
Epoch [59/500], Loss: 0.0016706496244296432
Epoch [60/500], Loss: 0.0031203352846205235
Epoch [61/500], Loss: 0.022609718143939972
Epoch [62/500], Loss: 0.007998031564056873
Epoch [63/500], Loss: 0.03089986741542816
Epoch [64/500], Loss: 0.02436087280511856
Epoch [65/500], Loss: 0.0439436249434948
Epoch [66/500], Loss: 0.009039153344929218
Epoch [67/500], Loss: 0.03196775168180466
Epoch [68/500], Loss: 0.0014594022650271654
Epoch [69/500], Loss: 0.003043722826987505
Epoch [70/500], Loss: 0.02321109175682068
Epoch [71/500], Loss: 0.001104667317122221
Epoch [72/500], Loss: 0.0014685456408187747
Epoch [73/500], Loss: 0.010093788616359234
Epoch [74/500], Loss: 0.025984715670347214
Epoch [75/500], Loss: 0.055412407964468
Epoch [76/500], Loss: 0.027970263734459877
Epoch [77/500], Loss: 0.00738972332328558
Epoch [78/500], Loss: 0.0005837245844304562
Epoch [79/500], Loss: 0.002833475824445486
Epoch [80/500], Loss: 0.039277084171772
Epoch [81/500], Loss: 0.006304681301116943
Epoch [82/500], Loss: 0.027889391407370567
Epoch [83/500], Loss: 0.04046902805566788
Epoch [84/500], Loss: 0.0026628756895661354
Epoch [85/500], Loss: 0.00805905181914568
Epoch [86/500], Loss: 0.039849903434515
Epoch [87/500], Loss: 0.0007797772996127605
Epoch [88/500], Loss: 0.0935254693031311
Epoch [89/500], Loss: 0.02649867907166481
Epoch [90/500], Loss: 0.004896295256912708
Epoch [91/500], Loss: 0.0026272933464497328
Epoch [92/500], Loss: 0.04471985623240471
Epoch [93/500], Loss: 0.005443623289465904
Epoch [94/500], Loss: 0.006703123450279236
Epoch [95/500], Loss: 0.03889564424753189
Epoch [96/500], Loss: 0.002041529631242156
Epoch [97/500], Loss: 0.022731401026248932
Epoch [98/500], Loss: 0.02479637786746025
Epoch [99/500], Loss: 0.002792914630845189
Epoch [100/500], Loss: 0.01177084632217884
Epoch [101/500], Loss: 0.0010617190273478627
Epoch [102/500], Loss: 0.006667059846222401
Epoch [103/500], Loss: 0.06580355018377304
Epoch [104/500], Loss: 0.11343423277139664
Epoch [105/500], Loss: 0.005578789860010147
Epoch [106/500], Loss: 0.0020729107782244682
Epoch [107/500], Loss: 0.0024541851598769426
Epoch [108/500], Loss: 0.04115631431341171
Epoch [109/500], Loss: 0.0013442374765872955
Epoch [110/500], Loss: 0.0062351468950510025
Epoch [111/500], Loss: 0.0005676917498931289
Epoch [112/500], Loss: 0.02799421362578869
Epoch [113/500], Loss: 0.0011389830615371466
Epoch [114/500], Loss: 0.0016039859037846327
Epoch [115/500], Loss: 0.0019230787875130773
Epoch [116/500], Loss: 0.0062424009665846825
Epoch [117/500], Loss: 0.01075577363371849
Epoch [118/500], Loss: 0.17046409845352173
Epoch [119/500], Loss: 0.008756555616855621
Epoch [120/500], Loss: 0.02538108266890049
Epoch [121/500], Loss: 0.0027733466122299433
Epoch [122/500], Loss: 0.002166420454159379
Epoch [123/500], Loss: 0.0005181884625926614
Epoch [124/500], Loss: 0.002128859516233206
Epoch [125/500], Loss: 0.048927824944257736
Epoch [126/500], Loss: 0.006466357968747616
Epoch [127/500], Loss: 0.0013223910937085748
Epoch [128/500], Loss: 0.016330869868397713
Epoch [129/500], Loss: 0.0340036004781723
Epoch [130/500], Loss: 0.005737701430916786
Epoch [131/500], Loss: 0.043695010244846344
Epoch [132/500], Loss: 0.02377896197140217
Epoch [133/500], Loss: 0.03224272280931473
Epoch [134/500], Loss: 0.0022535149473696947
Epoch [135/500], Loss: 0.005497700069099665
Epoch [136/500], Loss: 0.0016764058964326978
Epoch [137/500], Loss: 0.013641731813549995
Epoch [138/500], Loss: 0.03554676100611687
Epoch [139/500], Loss: 0.002655998570844531
Epoch [140/500], Loss: 0.014379335567355156
Epoch [141/500], Loss: 0.0004798714362550527
Epoch [142/500], Loss: 0.022848913446068764
Epoch [143/500], Loss: 0.003569053253158927
Epoch [144/500], Loss: 0.0010363985784351826
Epoch [145/500], Loss: 0.0011998001718893647
Epoch [146/500], Loss: 0.029332881793379784
Epoch [147/500], Loss: 0.0021700109355151653
Epoch [148/500], Loss: 0.04989006742835045
Epoch [149/500], Loss: 0.017596838995814323
Epoch [150/500], Loss: 0.005552900023758411
Epoch [151/500], Loss: 0.02130822092294693
Epoch [152/500], Loss: 0.013184739276766777
Epoch [153/500], Loss: 0.003693445585668087
Epoch [154/500], Loss: 0.0002814586041495204
Epoch [155/500], Loss: 0.011256406083703041
Epoch [156/500], Loss: 0.0007573600159958005
Epoch [157/500], Loss: 0.0019472946878522635
Epoch [158/500], Loss: 0.015084566548466682
Epoch [159/500], Loss: 0.006373480428010225
Epoch [160/500], Loss: 0.0014554281951859593
Epoch [161/500], Loss: 0.003039491828531027
Epoch [162/500], Loss: 0.0013355282135307789
Epoch [163/500], Loss: 0.0038741338066756725
Epoch [164/500], Loss: 0.0035878620110452175
Epoch [165/500], Loss: 0.030438825488090515
Epoch [166/500], Loss: 0.0697525143623352
Epoch [167/500], Loss: 0.026584984734654427
Epoch [168/500], Loss: 0.0019321260042488575
Epoch [169/500], Loss: 0.05357321351766586
Epoch [170/500], Loss: 0.016700396314263344
Epoch [171/500], Loss: 0.09351684153079987
Epoch [172/500], Loss: 0.008441533893346786
Epoch [173/500], Loss: 0.016276434063911438
Epoch [174/500], Loss: 0.07126587629318237
Epoch [175/500], Loss: 0.005204909015446901
Epoch [176/500], Loss: 0.003409262513741851
Epoch [177/500], Loss: 0.04996544122695923
Epoch [178/500], Loss: 0.04525388404726982
Epoch [179/500], Loss: 0.009854236617684364
Epoch [180/500], Loss: 0.0035332152619957924
Epoch [181/500], Loss: 0.0025211272295564413
Epoch [182/500], Loss: 0.004975481424480677
Epoch [183/500], Loss: 0.0014112028293311596
Epoch [184/500], Loss: 0.002589487237855792
Epoch [185/500], Loss: 0.0003969790996052325
Epoch [186/500], Loss: 0.07826998084783554
Epoch [187/500], Loss: 0.0003743321285583079
Epoch [188/500], Loss: 0.028815554454922676
Epoch [189/500], Loss: 0.006910406053066254
Epoch [190/500], Loss: 0.0004680223355535418
Epoch [191/500], Loss: 0.0004503509262576699
Epoch [192/500], Loss: 0.0013653915375471115
Epoch [193/500], Loss: 0.06225353106856346
Epoch [194/500], Loss: 0.01694313995540142
Epoch [195/500], Loss: 0.024628346785902977
Epoch [196/500], Loss: 0.008102591149508953
Epoch [197/500], Loss: 0.042955197393894196
Epoch [198/500], Loss: 0.04331319406628609
Epoch [199/500], Loss: 0.04208041727542877
Epoch [200/500], Loss: 0.0032112603075802326
Epoch [201/500], Loss: 0.0031181415542960167
Epoch [202/500], Loss: 0.006705279462039471
Epoch [203/500], Loss: 0.006221733056008816
Epoch [204/500], Loss: 0.06713331490755081
Epoch [205/500], Loss: 0.0023540379479527473
Epoch [206/500], Loss: 0.012740552425384521
Epoch [207/500], Loss: 0.03839072957634926
Epoch [208/500], Loss: 0.0030456939712166786
Epoch [209/500], Loss: 0.012639260850846767
Epoch [210/500], Loss: 0.08555809408426285
Epoch [211/500], Loss: 0.011615005321800709
Epoch [212/500], Loss: 0.004777462687343359
Epoch [213/500], Loss: 0.012780732475221157
Epoch [214/500], Loss: 0.009147602133452892
Epoch [215/500], Loss: 0.0058478908613324165
Epoch [216/500], Loss: 0.04147972911596298
Epoch [217/500], Loss: 0.005171542055904865
Epoch [218/500], Loss: 7.350118539761752e-05
Epoch [219/500], Loss: 0.014487506821751595
Epoch [220/500], Loss: 0.014558564871549606
Epoch [221/500], Loss: 0.004064792767167091
Epoch [222/500], Loss: 0.013005146756768227
Epoch [223/500], Loss: 0.0153804374858737
Epoch [224/500], Loss: 0.0026739821769297123
Epoch [225/500], Loss: 0.021649105474352837
Epoch [226/500], Loss: 4.6764260332565755e-05
Epoch [227/500], Loss: 0.008229969069361687
Epoch [228/500], Loss: 0.03477006033062935
Epoch [229/500], Loss: 0.045727379620075226
Epoch [230/500], Loss: 0.005768945906311274
Epoch [231/500], Loss: 0.03247058391571045
Epoch [232/500], Loss: 0.012503192760050297
Epoch [233/500], Loss: 0.002126991981640458
Epoch [234/500], Loss: 0.010053770616650581
Epoch [235/500], Loss: 0.00034815288381651044
Epoch [236/500], Loss: 0.048214126378297806
Epoch [237/500], Loss: 0.00026693425024859607
Epoch [238/500], Loss: 2.02381270355545e-05
Epoch [239/500], Loss: 0.005556574556976557
Epoch [240/500], Loss: 0.03920856863260269
Epoch [241/500], Loss: 0.0034664086997509003
Epoch [242/500], Loss: 0.04071921110153198
Epoch [243/500], Loss: 0.0006618301849812269
Epoch [244/500], Loss: 0.0021888422779738903
Epoch [245/500], Loss: 0.0013250377960503101
Epoch [246/500], Loss: 0.003629772923886776
Epoch [247/500], Loss: 0.0594235323369503
Epoch [248/500], Loss: 0.016643522307276726
Epoch [249/500], Loss: 0.009673218242824078
Epoch [250/500], Loss: 0.0036092321388423443
Epoch [251/500], Loss: 0.05879771336913109
Epoch [252/500], Loss: 0.003090853802859783
Epoch [253/500], Loss: 0.02111334726214409
Epoch [254/500], Loss: 0.022870125249028206
Epoch [255/500], Loss: 0.05318121984601021
Epoch [256/500], Loss: 0.0017253202386200428
Epoch [257/500], Loss: 0.007201156113296747
Epoch [258/500], Loss: 0.002112701768055558
Epoch [259/500], Loss: 0.0073450361378490925
Epoch [260/500], Loss: 0.015811219811439514
Epoch [261/500], Loss: 0.004212500061839819
Epoch [262/500], Loss: 0.001272354507818818
Epoch [263/500], Loss: 0.005261383485049009
Epoch [264/500], Loss: 0.0038850714918226004
Epoch [265/500], Loss: 0.05691572651267052
Epoch [266/500], Loss: 0.006289471406489611
Epoch [267/500], Loss: 0.002757476642727852
Epoch [268/500], Loss: 0.021156737580895424
Epoch [269/500], Loss: 0.0023931332398205996
Epoch [270/500], Loss: 0.006766161881387234
Epoch [271/500], Loss: 0.02514025941491127
Epoch [272/500], Loss: 0.00140485935844481
Epoch [273/500], Loss: 0.0009262938983738422
Epoch [274/500], Loss: 0.004550900310277939
Epoch [275/500], Loss: 0.00421573081985116
Epoch [276/500], Loss: 0.00157522177323699
Epoch [277/500], Loss: 0.0007012173300608993
Epoch [278/500], Loss: 0.026990199461579323
Epoch [279/500], Loss: 4.483868178795092e-05
Epoch [280/500], Loss: 0.004302495159208775
Epoch [281/500], Loss: 0.008067263290286064
Epoch [282/500], Loss: 0.011439524590969086
Epoch [283/500], Loss: 9.122218762058765e-05
Epoch [284/500], Loss: 0.0031750069465488195
Epoch [285/500], Loss: 0.029386484995484352
Epoch [286/500], Loss: 0.005599193274974823
Epoch [287/500], Loss: 0.005738256499171257
Epoch [288/500], Loss: 0.06986704468727112
Epoch [289/500], Loss: 0.0018949476070702076
Epoch [290/500], Loss: 0.09041392058134079
Epoch [291/500], Loss: 0.018560271710157394
Epoch [292/500], Loss: 0.0026267501525580883
Epoch [293/500], Loss: 0.0019922247156500816
Epoch [294/500], Loss: 0.017104022204875946
Epoch [295/500], Loss: 0.0005694482824765146
Epoch [296/500], Loss: 0.007507719565182924
Epoch [297/500], Loss: 0.006482328288257122
Epoch [298/500], Loss: 0.0024087103083729744
Epoch [299/500], Loss: 0.05851000174880028
Epoch [300/500], Loss: 0.0011563425650820136
Epoch [301/500], Loss: 0.011744212359189987
Epoch [302/500], Loss: 0.002184823388233781
Epoch [303/500], Loss: 0.0058927880600094795
Epoch [304/500], Loss: 0.00927648413926363
Epoch [305/500], Loss: 0.0479230061173439
Epoch [306/500], Loss: 0.004363314248621464
Epoch [307/500], Loss: 0.0086222467944026
Epoch [308/500], Loss: 0.030482163652777672
Epoch [309/500], Loss: 0.003002484329044819
Epoch [310/500], Loss: 0.0066330209374427795
Epoch [311/500], Loss: 0.018022719770669937
Epoch [312/500], Loss: 0.00010116666089743376
Epoch [313/500], Loss: 0.00018753194308374077
Epoch [314/500], Loss: 0.004829633980989456
Epoch [315/500], Loss: 0.00032581284176558256
Epoch [316/500], Loss: 0.002615016885101795
Epoch [317/500], Loss: 0.007050527725368738
Epoch [318/500], Loss: 0.005131404846906662
Epoch [319/500], Loss: 0.0011197510175406933
Epoch [320/500], Loss: 0.05249177664518356
Epoch [321/500], Loss: 0.03191399574279785
Epoch [322/500], Loss: 0.00019152279128320515
Epoch [323/500], Loss: 0.004909854382276535
Epoch [324/500], Loss: 0.011886325664818287
Epoch [325/500], Loss: 0.029640868306159973
Epoch [326/500], Loss: 0.031823888421058655
Epoch [327/500], Loss: 0.0062920390628278255
Epoch [328/500], Loss: 0.00017116339586209506
Epoch [329/500], Loss: 0.001436929334886372
Epoch [330/500], Loss: 0.01118563488125801
Epoch [331/500], Loss: 0.00213825237005949
Epoch [332/500], Loss: 0.03062581643462181
Epoch [333/500], Loss: 0.0002852695470210165
Epoch [334/500], Loss: 0.07077590376138687
Epoch [335/500], Loss: 0.003759284969419241
Epoch [336/500], Loss: 0.0001586130092618987
Epoch [337/500], Loss: 0.0017211722442880273
Epoch [338/500], Loss: 0.02109840139746666
Epoch [339/500], Loss: 3.771989213419147e-05
Epoch [340/500], Loss: 0.032939717173576355
Epoch [341/500], Loss: 0.00371516402810812
Epoch [342/500], Loss: 8.676133438711986e-05
Epoch [343/500], Loss: 0.08196665346622467
Epoch [344/500], Loss: 0.013854238204658031
Epoch [345/500], Loss: 0.0006285720155574381
Epoch [346/500], Loss: 0.02226456254720688
Epoch [347/500], Loss: 0.014572139829397202
Epoch [348/500], Loss: 0.0019131932640448213
Epoch [349/500], Loss: 0.00010305445175617933
Epoch [350/500], Loss: 0.0005039064562879503
Epoch [351/500], Loss: 0.004073971416801214
Epoch [352/500], Loss: 0.033171914517879486
Epoch [353/500], Loss: 0.010129127651453018
Epoch [354/500], Loss: 0.0022702424321323633
Epoch [355/500], Loss: 0.008167754858732224
Epoch [356/500], Loss: 0.00785281416028738
Epoch [357/500], Loss: 0.000322493928251788
Epoch [358/500], Loss: 0.004858750384300947
Epoch [359/500], Loss: 0.004881981760263443
Epoch [360/500], Loss: 0.00746745802462101
Epoch [361/500], Loss: 0.010379328392446041
Epoch [362/500], Loss: 0.0058652726002037525
Epoch [363/500], Loss: 0.0004618314269464463
Epoch [364/500], Loss: 0.006085830274969339
Epoch [365/500], Loss: 0.000941723701544106
Epoch [366/500], Loss: 0.03767510876059532
Epoch [367/500], Loss: 0.011685237288475037
Epoch [368/500], Loss: 0.0008707785164006054
Epoch [369/500], Loss: 0.02531655691564083
Epoch [370/500], Loss: 0.004996206145733595
Epoch [371/500], Loss: 0.0010867653181776404
Epoch [372/500], Loss: 0.0006999694742262363
Epoch [373/500], Loss: 0.01705274172127247
Epoch [374/500], Loss: 0.05384088680148125
Epoch [375/500], Loss: 0.01126344595104456
Epoch [376/500], Loss: 0.015743853524327278
Epoch [377/500], Loss: 0.0320991650223732
Epoch [378/500], Loss: 0.0005882499390281737
Epoch [379/500], Loss: 0.0021416516974568367
Epoch [380/500], Loss: 0.05442241206765175
Epoch [381/500], Loss: 0.0015610777772963047
Epoch [382/500], Loss: 0.0005470622563734651
Epoch [383/500], Loss: 0.018941041082143784
Epoch [384/500], Loss: 0.012094169855117798
Epoch [385/500], Loss: 0.019305747002363205
Epoch [386/500], Loss: 0.07708469778299332
Epoch [387/500], Loss: 0.002655626507475972
Epoch [388/500], Loss: 0.004292313475161791
Epoch [389/500], Loss: 0.07936340570449829
Epoch [390/500], Loss: 0.0016670332988724113
Epoch [391/500], Loss: 0.016174355521798134
Epoch [392/500], Loss: 0.006798918824642897
Epoch [393/500], Loss: 0.00029492948669940233
Epoch [394/500], Loss: 0.00011687564983731136
Epoch [395/500], Loss: 0.017136242240667343
Epoch [396/500], Loss: 0.03577888756990433
Epoch [397/500], Loss: 0.0003159995249006897
Epoch [398/500], Loss: 0.015117786824703217
Epoch [399/500], Loss: 0.0011421125382184982
Epoch [400/500], Loss: 0.015192978084087372
Epoch [401/500], Loss: 0.004745943006128073
Epoch [402/500], Loss: 0.01897788792848587
Epoch [403/500], Loss: 0.003992446698248386
Epoch [404/500], Loss: 0.002169169718399644
Epoch [405/500], Loss: 0.00431237043812871
Epoch [406/500], Loss: 0.009031831286847591
Epoch [407/500], Loss: 0.0004428187385201454
Epoch [408/500], Loss: 0.0013586758868768811
Epoch [409/500], Loss: 0.0005756563041359186
Epoch [410/500], Loss: 0.0019101585494354367
Epoch [411/500], Loss: 0.007107924669981003
Epoch [412/500], Loss: 0.030316229909658432
Epoch [413/500], Loss: 0.0037615930195897818
Epoch [414/500], Loss: 0.07749312371015549
Epoch [415/500], Loss: 0.0011442011455073953
Epoch [416/500], Loss: 0.02641211822628975
Epoch [417/500], Loss: 0.0019013454439118505
Epoch [418/500], Loss: 0.0012308870209380984
Epoch [419/500], Loss: 0.029525063931941986
Epoch [420/500], Loss: 0.0033975038677453995
Epoch [421/500], Loss: 0.019639603793621063
Epoch [422/500], Loss: 0.026813963428139687
Epoch [423/500], Loss: 0.09774226695299149
Epoch [424/500], Loss: 0.0067322226241230965
Epoch [425/500], Loss: 0.0039830454625189304
Epoch [426/500], Loss: 0.007469410542398691
Epoch [427/500], Loss: 0.006599951535463333
Epoch [428/500], Loss: 0.0003117715532425791
Epoch [429/500], Loss: 0.06321053951978683
Epoch [430/500], Loss: 0.02796204388141632
Epoch [431/500], Loss: 0.01125897467136383
Epoch [432/500], Loss: 0.012813759967684746
Epoch [433/500], Loss: 0.00016852852422744036
Epoch [434/500], Loss: 4.32200358773116e-05
Epoch [435/500], Loss: 4.2314281017752364e-05
Epoch [436/500], Loss: 0.0017623314633965492
Epoch [437/500], Loss: 0.00034134119050577283
Epoch [438/500], Loss: 0.003189063398167491
Epoch [439/500], Loss: 0.012208868749439716
Epoch [440/500], Loss: 0.00021606399968732148
Epoch [441/500], Loss: 0.0074579231441020966
Epoch [442/500], Loss: 0.056500423699617386
Epoch [443/500], Loss: 0.00363926007412374
Epoch [444/500], Loss: 0.005867509637027979
Epoch [445/500], Loss: 0.024584908038377762
Epoch [446/500], Loss: 0.003773406380787492
Epoch [447/500], Loss: 0.0007757568964734674
Epoch [448/500], Loss: 0.0009093397529795766
Epoch [449/500], Loss: 0.10213126242160797
Epoch [450/500], Loss: 0.0014893009793013334
Epoch [451/500], Loss: 0.037768542766571045
Epoch [452/500], Loss: 0.004203153774142265
Epoch [453/500], Loss: 0.06414327025413513
Epoch [454/500], Loss: 0.00043996336171403527
Epoch [455/500], Loss: 0.07357332110404968
Epoch [456/500], Loss: 0.00012314450577832758
Epoch [457/500], Loss: 0.001822543446905911
Epoch [458/500], Loss: 0.007053156848996878
Epoch [459/500], Loss: 0.0015076244017109275
Epoch [460/500], Loss: 0.06966470181941986
Epoch [461/500], Loss: 0.0076424842700362206
Epoch [462/500], Loss: 0.007046313025057316
Epoch [463/500], Loss: 0.0007923615048639476
Epoch [464/500], Loss: 0.0010421440238133073
Epoch [465/500], Loss: 0.0005530701600946486
Epoch [466/500], Loss: 0.019697077572345734
Epoch [467/500], Loss: 0.01717797853052616
Epoch [468/500], Loss: 0.027404241263866425
Epoch [469/500], Loss: 0.02524019032716751
Epoch [470/500], Loss: 0.0001539852819405496
Epoch [471/500], Loss: 1.759162114467472e-05
Epoch [472/500], Loss: 0.016116004437208176
Epoch [473/500], Loss: 0.03092670999467373
Epoch [474/500], Loss: 0.002649572677910328
Epoch [475/500], Loss: 0.0005785493995063007
Epoch [476/500], Loss: 0.0015102593461051583
Epoch [477/500], Loss: 0.024442831054329872
Epoch [478/500], Loss: 0.004341053776443005
Epoch [479/500], Loss: 0.0004253871738910675
Epoch [480/500], Loss: 0.005762408021837473
Epoch [481/500], Loss: 0.004989593755453825
Epoch [482/500], Loss: 0.025062531232833862
Epoch [483/500], Loss: 0.03994796425104141
Epoch [484/500], Loss: 0.0008264294592663646
Epoch [485/500], Loss: 0.0017936986405402422
Epoch [486/500], Loss: 0.0003618694026954472
Epoch [487/500], Loss: 9.855670214165002e-05
Epoch [488/500], Loss: 0.004056067205965519
Epoch [489/500], Loss: 0.0008443252881988883
Epoch [490/500], Loss: 0.004218078218400478
Epoch [491/500], Loss: 0.00034481531474739313
Epoch [492/500], Loss: 0.024722788482904434
Epoch [493/500], Loss: 0.023092253133654594
Epoch [494/500], Loss: 0.00404107291251421
Epoch [495/500], Loss: 0.025037666782736778
Epoch [496/500], Loss: 0.02405189536511898
Epoch [497/500], Loss: 0.06052062287926674
Epoch [498/500], Loss: 8.332116703968495e-05
Epoch [499/500], Loss: 0.0018309121951460838
Epoch [500/500], Loss: 0.0009281415259465575
In [90]:
# 在测试集上评估模型
model.eval()
total_loss = 0.0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
    mean_loss = total_loss / len(test_loader)
    print(f'Mean Squared Error on Test Set: {mean_loss}')
Mean Squared Error on Test Set: 0.044565981098761163
In [91]:
# 在测试集上评估模型
model.eval()
test_pred = []
test_label = []
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        test_pred += [i[0] for i in scaler.inverse_transform(outputs)]
        test_label += [i[0] for i in scaler.inverse_transform(labels)]
    get_acc(test_pred,test_label)
Mean Squared Error: 699595.7527474399
Root Mean Squared Error: 836.418407704804
Mean Absolute Error: 518.1976214232881
Coefficient of Determination (R²): 0.3576555083971398

孔体积cm3/g)

In [28]:
from sklearn.model_selection import train_test_split

X = data_2[['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',
       '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']]
y = data_2['孔体积cm3/g)']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
In [29]:
from sklearn.linear_model import LinearRegression
clf = LinearRegression()
clf.fit(X_train, y_train)
test_pred = clf.predict(X_test)
In [30]:
get_acc(y_test,test_pred)
Mean Squared Error: 0.23406172859604119
Root Mean Squared Error: 0.4837992647741842
Mean Absolute Error: 0.2891327306690083
Coefficient of Determination (R²): 0.17152237332862152
In [31]:
coefficients = clf.coef_
key = ['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',
       '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']
for index,i in enumerate(coefficients):
    print(key[index] + ':' + str(i))
煤种:-0.5334104525105708
分析水Mad:1.049110275675087
灰分:-0.47087329582498033
挥发分:-0.09779320575303108
碳:0.8104530679119436
氢:0.6541821436278573
氮:-0.13529073464861613
硫:0.11133231819862932
氧:0.9365424234738452
碳化温度(℃):1.3646667632508422
升温速率(℃/min):-0.05147059728009279
保温时间(h):0.8081640623452375
KOH:0.8825591062288682
K2CO3:-0.46868652599186134
In [32]:
import lightgbm as lgb
# 创建 LightGBM 数据集
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# 设置参数
params = {
    'objective': 'regression',  # 回归问题
    'boosting_type': 'gbdt',     # 使用梯度提升
    'metric': 'rmse',            # 使用均方根误差作为评估指标
    'num_leaves': 2,            # 每棵树的叶子节点数
    'learning_rate': 0.01,       # 学习率
}
# 训练模型
#定义callback回调
callback=[lgb.early_stopping(stopping_rounds=10,verbose=True),
          lgb.log_evaluation(period=10,show_stdv=True)]
# 训练 train
m1 = lgb.train(params,train_data,num_boost_round=200,
               valid_sets=[train_data,test_data],callbacks=callback)

# 进行预测
y_pred = m1.predict(X_test, num_iteration=m1.best_iteration)
get_acc(y_test,y_pred)
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000325 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 112
[LightGBM] [Info] Number of data points in the train set: 58, number of used features: 11
[LightGBM] [Info] Start training from score 1.226647
Training until validation scores don't improve for 10 rounds
[10]	training's rmse: 0.638817	valid_1's rmse: 0.579195
[20]	training's rmse: 0.62939	valid_1's rmse: 0.5711
[30]	training's rmse: 0.621342	valid_1's rmse: 0.567154
[40]	training's rmse: 0.614175	valid_1's rmse: 0.565478
[50]	training's rmse: 0.607441	valid_1's rmse: 0.562633
[60]	training's rmse: 0.601112	valid_1's rmse: 0.560704
[70]	training's rmse: 0.595164	valid_1's rmse: 0.558691
[80]	training's rmse: 0.589578	valid_1's rmse: 0.556304
[90]	training's rmse: 0.584311	valid_1's rmse: 0.55425
[100]	training's rmse: 0.579347	valid_1's rmse: 0.552013
[110]	training's rmse: 0.574669	valid_1's rmse: 0.550285
[120]	training's rmse: 0.570262	valid_1's rmse: 0.547892
[130]	training's rmse: 0.566114	valid_1's rmse: 0.546906
[140]	training's rmse: 0.562209	valid_1's rmse: 0.544828
[150]	training's rmse: 0.558535	valid_1's rmse: 0.543634
[160]	training's rmse: 0.555081	valid_1's rmse: 0.542251
[170]	training's rmse: 0.551832	valid_1's rmse: 0.54127
[180]	training's rmse: 0.54878	valid_1's rmse: 0.540094
[190]	training's rmse: 0.545912	valid_1's rmse: 0.539304
[200]	training's rmse: 0.543218	valid_1's rmse: 0.538589
Did not meet early stopping. Best iteration is:
[200]	training's rmse: 0.543218	valid_1's rmse: 0.538589
Mean Squared Error: 0.29007798664843815
Root Mean Squared Error: 0.5385888846313468
Mean Absolute Error: 0.4479232290957567
Coefficient of Determination (R²): -0.026751034308880817
In [33]:
plt.figure(figsize=(12,6))
lgb.plot_importance(m1, max_num_features=30)
plt.title("Featurertances")
plt.show()
<Figure size 1200x600 with 0 Axes>
No description has been provided for this image

微孔体积cm3/g)

In [34]:
X = data_3[['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',
       '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']]
y = data_3['微孔体积cm3/g)']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
In [35]:
from sklearn.linear_model import LinearRegression
clf = LinearRegression()
clf.fit(X_train, y_train)
test_pred = clf.predict(X_test)
In [36]:
get_acc(y_test,test_pred)
Mean Squared Error: 0.058644300055131
Root Mean Squared Error: 0.24216585237215218
Mean Absolute Error: 0.1869146665913017
Coefficient of Determination (R²): 0.6014531524700744
In [37]:
coefficients = clf.coef_
key = ['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',
       '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']
for index,i in enumerate(coefficients):
    print(key[index] + ':' + str(i))
煤种:0.038009384028790595
分析水Mad:0.5270239540332698
灰分:-0.2935950026944355
挥发分:-0.47505236069221957
碳:0.7925565775686018
氢:1.0226097972268275
氮:-0.2164263814475165
硫:-0.187314647621951
氧:0.4324238526226014
碳化温度(℃):0.5946628823386292
升温速率(℃/min):-0.033400473160046816
保温时间(h):0.27469886155258133
KOH:0.6542891650386291
K2CO3:-0.02072386352158367
In [38]:
import lightgbm as lgb
# 创建 LightGBM 数据集
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# 设置参数
params = {
    'objective': 'regression',  # 回归问题
    'boosting_type': 'gbdt',     # 使用梯度提升
    'metric': 'rmse',            # 使用均方根误差作为评估指标
    'num_leaves': 2,            # 每棵树的叶子节点数
    'learning_rate': 0.01,       # 学习率
}
# 训练模型
#定义callback回调
callback=[lgb.early_stopping(stopping_rounds=10,verbose=True),
          lgb.log_evaluation(period=10,show_stdv=True)]
# 训练 train
m1 = lgb.train(params,train_data,num_boost_round=200,
               valid_sets=[train_data,test_data],callbacks=callback)

# 进行预测
y_pred = m1.predict(X_test, num_iteration=m1.best_iteration)
get_acc(y_test,y_pred)
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000143 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 120
[LightGBM] [Info] Number of data points in the train set: 57, number of used features: 11
[LightGBM] [Info] Start training from score 0.730716
Training until validation scores don't improve for 10 rounds
[10]	training's rmse: 0.396872	valid_1's rmse: 0.372902
[20]	training's rmse: 0.389406	valid_1's rmse: 0.360807
[30]	training's rmse: 0.383114	valid_1's rmse: 0.351237
[40]	training's rmse: 0.377736	valid_1's rmse: 0.342674
[50]	training's rmse: 0.373147	valid_1's rmse: 0.335013
[60]	training's rmse: 0.369238	valid_1's rmse: 0.32816
[70]	training's rmse: 0.365907	valid_1's rmse: 0.322454
[80]	training's rmse: 0.363019	valid_1's rmse: 0.317971
[90]	training's rmse: 0.360347	valid_1's rmse: 0.314376
[100]	training's rmse: 0.357855	valid_1's rmse: 0.31094
[110]	training's rmse: 0.355528	valid_1's rmse: 0.30764
[120]	training's rmse: 0.353358	valid_1's rmse: 0.304501
[130]	training's rmse: 0.351336	valid_1's rmse: 0.301502
[140]	training's rmse: 0.349451	valid_1's rmse: 0.298626
[150]	training's rmse: 0.347695	valid_1's rmse: 0.295886
[160]	training's rmse: 0.34606	valid_1's rmse: 0.293263
[170]	training's rmse: 0.344537	valid_1's rmse: 0.29076
[180]	training's rmse: 0.34312	valid_1's rmse: 0.288368
[190]	training's rmse: 0.341801	valid_1's rmse: 0.286156
[200]	training's rmse: 0.340544	valid_1's rmse: 0.283861
Did not meet early stopping. Best iteration is:
[200]	training's rmse: 0.340544	valid_1's rmse: 0.283861
Mean Squared Error: 0.0805769145909526
Root Mean Squared Error: 0.2838607309772745
Mean Absolute Error: 0.2001926545731756
Coefficient of Determination (R²): 0.4523990351368772
In [39]:
plt.figure(figsize=(12,6))
lgb.plot_importance(m1, max_num_features=30)
plt.title("Featurertances")
plt.show()
<Figure size 1200x600 with 0 Axes>
No description has been provided for this image

介孔体积cm3/g)

In [40]:
X = data_4[['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',
       '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']]
y = data_4['介孔体积cm3/g)']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
In [41]:
from sklearn.linear_model import LinearRegression
clf = LinearRegression()
clf.fit(X_train, y_train)
test_pred = clf.predict(X_test)
In [42]:
get_acc(y_test,test_pred)
Mean Squared Error: 0.633360271830096
Root Mean Squared Error: 0.7958393505162308
Mean Absolute Error: 0.5042366422580364
Coefficient of Determination (R²): -1.5081213582093067
In [43]:
coefficients = clf.coef_
key = ['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',
       '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']
for index,i in enumerate(coefficients):
    print(key[index] + ':' + str(i))
煤种:-0.5607084300067899
分析水Mad:0.4984214658264311
灰分:-0.14117597792825254
挥发分:0.7366654205853167
碳:-8.993414025590598
氢:-3.3280548911300323
氮:-0.7426776489865452
硫:-0.30171686040116197
氧:-4.8875223095614375
碳化温度(℃):0.6126040745268642
升温速率(℃/min):-0.07211666342106407
保温时间(h):0.23227893599959817
KOH:0.32346263890721333
K2CO3:-0.30227894335703315
In [44]:
import lightgbm as lgb
# 创建 LightGBM 数据集
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# 设置参数
params = {
    'objective': 'regression',  # 回归问题
    'boosting_type': 'gbdt',     # 使用梯度提升
    'metric': 'rmse',            # 使用均方根误差作为评估指标
    'num_leaves': 2,            # 每棵树的叶子节点数
    'learning_rate': 0.01,       # 学习率
}
# 训练模型
#定义callback回调
callback=[lgb.early_stopping(stopping_rounds=10,verbose=True),
          lgb.log_evaluation(period=10,show_stdv=True)]
# 训练 train
m1 = lgb.train(params,train_data,num_boost_round=200,
               valid_sets=[train_data,test_data],callbacks=callback)

# 进行预测
y_pred = m1.predict(X_test, num_iteration=m1.best_iteration)
get_acc(y_test,y_pred)
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000148 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 74
[LightGBM] [Info] Number of data points in the train set: 48, number of used features: 9
[LightGBM] [Info] Start training from score 0.524919
Training until validation scores don't improve for 10 rounds
[10]	training's rmse: 0.428728	valid_1's rmse: 0.487862
[20]	training's rmse: 0.417581	valid_1's rmse: 0.477524
[30]	training's rmse: 0.407814	valid_1's rmse: 0.470645
[40]	training's rmse: 0.399242	valid_1's rmse: 0.464887
[50]	training's rmse: 0.391738	valid_1's rmse: 0.460082
[60]	training's rmse: 0.385183	valid_1's rmse: 0.456108
[70]	training's rmse: 0.379467	valid_1's rmse: 0.452842
[80]	training's rmse: 0.374493	valid_1's rmse: 0.450179
[90]	training's rmse: 0.370172	valid_1's rmse: 0.448034
[100]	training's rmse: 0.366423	valid_1's rmse: 0.446322
[110]	training's rmse: 0.363176	valid_1's rmse: 0.444977
[120]	training's rmse: 0.360366	valid_1's rmse: 0.443942
[130]	training's rmse: 0.357934	valid_1's rmse: 0.443339
[140]	training's rmse: 0.35582	valid_1's rmse: 0.443088
[150]	training's rmse: 0.353985	valid_1's rmse: 0.44256
[160]	training's rmse: 0.352391	valid_1's rmse: 0.442537
[170]	training's rmse: 0.350995	valid_1's rmse: 0.442414
Early stopping, best iteration is:
[167]	training's rmse: 0.351395	valid_1's rmse: 0.442179
Mean Squared Error: 0.19552236767207556
Root Mean Squared Error: 0.44217911265919785
Mean Absolute Error: 0.30130148562563
Coefficient of Determination (R²): 0.22572689166468962
In [45]:
plt.figure(figsize=(12,6))
lgb.plot_importance(m1, max_num_features=30)
plt.title("Featurertances")
plt.show()
<Figure size 1200x600 with 0 Axes>
No description has been provided for this image