45 KiB
45 KiB
In [1]:
import os os.environ['CUDA_DEVICE_ORDER'] = 'PCB_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1'
In [2]:
import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt #新增加的两行 from pylab import mpl # 设置显示中文字体 mpl.rcParams["font.sans-serif"] = ["SimHei"] mpl.rcParams["axes.unicode_minus"] = False
In [3]:
data = pd.read_excel('./data/20240123/煤沥青数据.xlsx') data.head()
Out[3]:
碳源 | 共碳化物质 | 共碳化物/煤沥青 | 加热次数 | 是否有碳化过程 | 模板剂种类 | 模板剂比例 | KOH与煤沥青比例 | 活化温度 | 升温速率 | 活化时间 | 混合方式 | 比表面积 | 总孔体积 | 微孔体积 | 平均孔径 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 煤沥青 | 无 | 0.0 | 1 | 否 | 自制氧化钙 | 1.0 | 1.0 | 500 | 5 | 2.0 | 溶剂 | 908.07 | 0.40 | 0.34 | 1.75 |
1 | 煤沥青 | 无 | 0.0 | 1 | 否 | 自制氧化钙 | 1.0 | 0.5 | 600 | 5 | 2.0 | 溶剂 | 953.95 | 0.66 | 0.35 | 2.76 |
2 | 煤沥青 | 无 | 0.0 | 1 | 否 | 自制氧化钙 | 1.0 | 1.0 | 600 | 5 | 2.0 | 溶剂 | 1388.62 | 0.61 | 0.53 | 1.77 |
3 | 煤沥青 | 无 | 0.0 | 1 | 否 | 自制氧化钙 | 1.0 | 2.0 | 600 | 5 | 2.0 | 溶剂 | 1444.63 | 0.59 | 0.55 | 1.62 |
4 | 煤沥青 | 无 | 0.0 | 2 | 是 | 自制碱式碳酸镁 | 1.0 | 1.0 | 600 | 5 | 2.0 | 溶剂 | 1020.99 | 0.45 | 0.35 | 1.77 |
In [4]:
data.drop(columns=['碳源'], inplace=True)
In [5]:
object_cols = ['共碳化物质', '是否有碳化过程', '模板剂种类', '混合方式']
In [6]:
data_0102 = pd.get_dummies(data, columns=object_cols)
In [7]:
out_cols = ['比表面积', '总孔体积', '微孔体积', '平均孔径'] feature_cols = [x for x in data_0102.columns if x not in out_cols] feature_cols
Out[7]:
['共碳化物/煤沥青', '加热次数', '模板剂比例', 'KOH与煤沥青比例', '活化温度', '升温速率', '活化时间', '共碳化物质_2-甲基咪唑', '共碳化物质_三聚氰胺', '共碳化物质_尿素', '共碳化物质_无', '共碳化物质_硫酸铵', '共碳化物质_聚磷酸铵', '是否有碳化过程_否', '是否有碳化过程_是', '模板剂种类_Al2O3', '模板剂种类_TiO2', '模板剂种类_α-Fe2O3', '模板剂种类_γ-Fe2O3', '模板剂种类_二氧化硅', '模板剂种类_无', '模板剂种类_氯化钾', '模板剂种类_纤维素', '模板剂种类_自制氢氧化镁', '模板剂种类_自制氧化钙', '模板剂种类_自制氧化锌', '模板剂种类_自制氧化镁', '模板剂种类_自制碱式碳酸镁', '模板剂种类_购买氢氧化镁', '模板剂种类_购买氧化钙', '模板剂种类_购买氧化锌', '模板剂种类_购买氧化镁', '模板剂种类_购买氯化钠', '模板剂种类_购买碳酸钙', '混合方式_溶剂', '混合方式_研磨']
In [8]:
out_cols
Out[8]:
['比表面积', '总孔体积', '微孔体积', '平均孔径']
In [9]:
import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers import tensorflow.keras.backend as K
2024-01-23 12:21:49.081644: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. 2024-01-23 12:21:49.083823: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. 2024-01-23 12:21:49.125771: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. 2024-01-23 12:21:49.126872: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. 2024-01-23 12:21:50.338510: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
In [10]:
from tensorflow.keras import Model
In [12]:
from tensorflow.keras.initializers import Constant
In [13]:
def get_prediction_model(): inputs = layers.Input(shape=(1, len(feature_cols)), name='input') x = layers.Conv1D(filters=64, kernel_size=1, activation='relu')(inputs) # x = layers.Dropout(rate=0.1)(x) lstm_out = layers.Bidirectional(layers.LSTM(units=64, return_sequences=True))(x) lstm_out = layers.Dense(128, activation='relu')(lstm_out) # transformer_block = TransformerBlock(128, num_heads, ff_dim, name='first_attn') # out = transformer_block(lstm_out) # out = layers.GlobalAveragePooling1D()(out) out = layers.Dropout(0.1)(lstm_out) out = layers.Dense(64, activation='relu')(out) bet = layers.Dense(1, activation='sigmoid', name='vad')(out) model = Model(inputs=[inputs], outputs=[bet]) return model
In [14]:
model = get_prediction_model() model.summary()
2024-01-23 12:22:03.707721: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_INVALID_DEVICE: invalid device ordinal
Model: "model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input (InputLayer) [(None, 1, 36)] 0 conv1d (Conv1D) (None, 1, 64) 2368 bidirectional (Bidirection (None, 1, 128) 66048 al) dense (Dense) (None, 1, 128) 16512 dropout (Dropout) (None, 1, 128) 0 dense_1 (Dense) (None, 1, 64) 8256 vad (Dense) (None, 1, 1) 65 ================================================================= Total params: 93249 (364.25 KB) Trainable params: 93249 (364.25 KB) Non-trainable params: 0 (0.00 Byte) _________________________________________________________________
In [15]:
from tensorflow.python.keras.utils.vis_utils import plot_model
In [16]:
train_data = data_0102.copy()
In [17]:
maxs = train_data.max() mins = train_data.min() for col in train_data.columns: if maxs[col] - mins[col] == 0: continue train_data[col] = (train_data[col] - mins[col]) / (maxs[col] - mins[col])
In [18]:
train_data
Out[18]:
共碳化物/煤沥青 | 加热次数 | 模板剂比例 | KOH与煤沥青比例 | 活化温度 | 升温速率 | 活化时间 | 比表面积 | 总孔体积 | 微孔体积 | ... | 模板剂种类_自制氧化镁 | 模板剂种类_自制碱式碳酸镁 | 模板剂种类_购买氢氧化镁 | 模板剂种类_购买氧化钙 | 模板剂种类_购买氧化锌 | 模板剂种类_购买氧化镁 | 模板剂种类_购买氯化钠 | 模板剂种类_购买碳酸钙 | 混合方式_溶剂 | 混合方式_研磨 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 0.0 | 0.1 | 0.066667 | 0.000000 | 0.3 | 0.333333 | 0.273437 | 0.137628 | 0.270767 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
1 | 0.0 | 0.0 | 0.1 | 0.033333 | 0.166667 | 0.3 | 0.333333 | 0.287345 | 0.229145 | 0.278754 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
2 | 0.0 | 0.0 | 0.1 | 0.066667 | 0.166667 | 0.3 | 0.333333 | 0.419103 | 0.211545 | 0.422524 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
3 | 0.0 | 0.0 | 0.1 | 0.133333 | 0.166667 | 0.3 | 0.333333 | 0.436081 | 0.204505 | 0.438498 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
4 | 0.0 | 1.0 | 0.1 | 0.066667 | 0.166667 | 0.3 | 0.333333 | 0.307666 | 0.155227 | 0.278754 | ... | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
144 | 0.0 | 0.0 | 0.0 | 0.266667 | 0.166667 | 0.3 | 0.000000 | 0.592301 | 0.331221 | 0.638179 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
145 | 0.0 | 0.0 | 0.0 | 0.266667 | 0.333333 | 0.3 | 0.000000 | 0.843589 | 0.472017 | 0.941693 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
146 | 0.0 | 0.0 | 0.0 | 0.266667 | 0.500000 | 0.3 | 0.000000 | 0.682631 | 0.376980 | 0.781949 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
147 | 0.0 | 0.0 | 0.0 | 0.200000 | 0.333333 | 0.3 | 0.000000 | 0.569567 | 0.292503 | 0.614217 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
148 | 0.0 | 0.0 | 0.0 | 0.333333 | 0.333333 | 0.3 | 0.000000 | 0.776902 | 0.394579 | 0.885783 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
149 rows × 40 columns
In [19]:
from sklearn.model_selection import KFold, train_test_split
In [20]:
from tensorflow.keras import optimizers from tensorflow.python.keras.utils.vis_utils import plot_model from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
In [21]:
from keras.callbacks import ReduceLROnPlateau reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=10, mode='auto')
In [22]:
def print_eva(y_true, y_pred, tp): MSE = mean_squared_error(y_true, y_pred) RMSE = np.sqrt(MSE) MAE = mean_absolute_error(y_true, y_pred) MAPE = mean_absolute_percentage_error(y_true, y_pred) R_2 = r2_score(y_true, y_pred) print(f"COL: {tp}, MSE: {format(MSE, '.2E')}", end=',') print(f'RMSE: {round(RMSE, 3)}', end=',') print(f'MAPE: {round(MAPE * 100, 3)} %', end=',') print(f'MAE: {round(MAE, 3)}', end=',') print(f'R_2: {round(R_2, 3)}') return [MSE, RMSE, MAE, MAPE, R_2]
In [23]:
from keras.losses import mean_squared_error
In [25]:
for pred_col in out_cols: print(pred_col) use_cols = feature_cols + [pred_col] use_data = train_data[use_cols].dropna().reset_index(drop=True) kf = KFold(n_splits=5, shuffle=True, random_state=42) vad_eva_list = list() for (train_index, test_index) in kf.split(use_data): train = use_data.loc[train_index] valid = use_data.loc[test_index] X = np.expand_dims(train[feature_cols].values, axis=1) Y = train[pred_col].values.T X_valid = np.expand_dims(valid[feature_cols].values, axis=1) Y_valid = valid[pred_col].values.T prediction_model = get_prediction_model() prediction_model.compile(optimizer='adam', loss=mean_squared_error) hist = prediction_model.fit(X, Y, epochs=120, batch_size=8, verbose=0, validation_data=(X_valid, Y_valid), callbacks=[reduce_lr] ) rst = prediction_model.predict(X_valid) pred_rst = rst * (maxs[pred_col] - mins[pred_col]) + mins[pred_col] real_rst = valid[pred_col] * (maxs[pred_col] - mins[pred_col]) + mins[pred_col] y_pred_vad = pred_rst.reshape(-1,) y_true_vad = real_rst.values.reshape(-1,) vad_eva = print_eva(y_true_vad, y_pred_vad, tp=pred_col) vad_eva_list.append(vad_eva) del prediction_model
比表面积 1/1 [==============================] - 1s 588ms/step COL: 比表面积, MSE: 6.12E+05,RMSE: 782.4299926757812,MAPE: 409.191 %,MAE: 622.412,R_2: 0.034 1/1 [==============================] - 1s 546ms/step COL: 比表面积, MSE: 4.94E+05,RMSE: 703.1320190429688,MAPE: 400.892 %,MAE: 560.341,R_2: 0.019 1/1 [==============================] - 1s 576ms/step COL: 比表面积, MSE: 7.66E+05,RMSE: 875.4010009765625,MAPE: 807.376 %,MAE: 735.814,R_2: 0.043 1/1 [==============================] - 1s 1s/step COL: 比表面积, MSE: 6.41E+05,RMSE: 800.6019897460938,MAPE: 1664.653 %,MAE: 621.767,R_2: -0.055 WARNING:tensorflow:5 out of the last 5 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7fdc106a8b80> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. 1/1 [==============================] - 1s 571ms/step COL: 比表面积, MSE: 6.27E+05,RMSE: 791.9520263671875,MAPE: 756.572 %,MAE: 627.752,R_2: -0.022 总孔体积 WARNING:tensorflow:6 out of the last 6 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7fdb30784b80> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. 1/1 [==============================] - 1s 540ms/step COL: 总孔体积, MSE: 2.80E-01,RMSE: 0.5289999842643738,MAPE: 313.778 %,MAE: 0.428,R_2: 0.153 1/1 [==============================] - 1s 574ms/step COL: 总孔体积, MSE: 1.50E-01,RMSE: 0.3880000114440918,MAPE: 381.536 %,MAE: 0.32,R_2: 0.016 1/1 [==============================] - 1s 556ms/step COL: 总孔体积, MSE: 2.93E-01,RMSE: 0.5419999957084656,MAPE: 166.29 %,MAE: 0.384,R_2: 0.104 1/1 [==============================] - 1s 540ms/step COL: 总孔体积, MSE: 3.27E-01,RMSE: 0.5720000267028809,MAPE: 428.563 %,MAE: 0.426,R_2: 0.124 1/1 [==============================] - 1s 557ms/step COL: 总孔体积, MSE: 3.52E-01,RMSE: 0.5929999947547913,MAPE: 333.533 %,MAE: 0.405,R_2: 0.12 微孔体积 1/1 [==============================] - 1s 541ms/step COL: 微孔体积, MSE: 7.01E-02,RMSE: 0.26499998569488525,MAPE: 597.598 %,MAE: 0.212,R_2: 0.031 1/1 [==============================] - 1s 544ms/step COL: 微孔体积, MSE: 1.05E-01,RMSE: 0.3240000009536743,MAPE: 1788.931 %,MAE: 0.273,R_2: 0.088 1/1 [==============================] - 1s 553ms/step COL: 微孔体积, MSE: 9.10E-02,RMSE: 0.3019999861717224,MAPE: 3142.796 %,MAE: 0.238,R_2: 0.033 1/1 [==============================] - 1s 559ms/step COL: 微孔体积, MSE: 1.26E-01,RMSE: 0.35600000619888306,MAPE: 1105.4 %,MAE: 0.298,R_2: 0.065 1/1 [==============================] - 1s 681ms/step COL: 微孔体积, MSE: 9.03E-02,RMSE: 0.3009999990463257,MAPE: 466.473 %,MAE: 0.226,R_2: 0.026 平均孔径 1/1 [==============================] - 1s 612ms/step COL: 平均孔径, MSE: 1.35E+00,RMSE: 1.159999966621399,MAPE: 30.229 %,MAE: 0.837,R_2: 0.108 1/1 [==============================] - 1s 560ms/step COL: 平均孔径, MSE: 2.38E+00,RMSE: 1.5420000553131104,MAPE: 37.616 %,MAE: 1.076,R_2: 0.095 1/1 [==============================] - 1s 571ms/step COL: 平均孔径, MSE: 2.05E+00,RMSE: 1.4329999685287476,MAPE: 32.263 %,MAE: 0.996,R_2: 0.03 1/1 [==============================] - 1s 530ms/step COL: 平均孔径, MSE: 1.42E+00,RMSE: 1.1920000314712524,MAPE: 33.395 %,MAE: 0.899,R_2: 0.221 1/1 [==============================] - 1s 532ms/step COL: 平均孔径, MSE: 1.58E+00,RMSE: 1.2589999437332153,MAPE: 33.747 %,MAE: 0.931,R_2: 0.009
In [ ]:
vad_df = pd.DataFrame.from_records(vad_eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2']) vad_df.sort_values(by='R_2').mean()
In [52]:
fcad_df = pd.DataFrame.from_records(fcad_eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2']) fcad_df.sort_values(by='R_2').mean()
Out[52]:
MSE 0.317628 RMSE 0.557178 MAE 0.412263 MAPE 0.007993 R_2 0.986373 dtype: float64
In [ ]:
train, valid = train_test_split(use_data[use_cols], test_size=0.3, random_state=42, shuffle=True) valid, test = train_test_split(valid, test_size=0.3, random_state=42, shuffle=True)
In [31]:
prediction_model = get_prediction_model() trainable_model = get_trainable_model(prediction_model)
In [34]:
X = np.expand_dims(train[feature_cols].values, axis=1) Y = [x for x in train[out_cols].values.T] Y_valid = [x for x in valid[out_cols].values.T]
In [ ]:
trainable_model.compile(optimizer='adam', loss=None) hist = trainable_model.fit([X, Y[0], Y[1]], epochs=120, batch_size=8, verbose=1, validation_data=[np.expand_dims(valid[feature_cols].values, axis=1), Y_valid[0], Y_valid[1]], callbacks=[reduce_lr] )
In [41]:
rst = prediction_model.predict(np.expand_dims(test[feature_cols], axis=1)) rst
Out[41]:
[array([[0.73740077], [0.89292204], [0.7599046 ], [0.67802393], [0.6815233 ], [0.88627005], [0.6121343 ], [0.7072234 ], [0.8561135 ], [0.52762157], [0.8325021 ], [0.50241977], [0.8242289 ], [0.68957335], [0.6980361 ], [0.82116604], [0.8566438 ], [0.53687835], [0.56832707], [0.78476715], [0.85638577]], dtype=float32), array([[0.68600863], [0.78454906], [0.8179163 ], [0.94351083], [0.86383885], [0.69705516], [0.6913491 ], [0.80277354], [0.93557894], [0.82278305], [0.82674253], [0.93518937], [0.8094449 ], [0.9206344 ], [0.7747319 ], [0.9137207 ], [0.9491073 ], [0.93225 ], [0.6185102 ], [0.8867341 ], [0.82890105]], dtype=float32)]
In [42]:
[np.exp(K.get_value(log_var[0]))**0.5 for log_var in trainable_model.layers[-1].log_vars]
Out[42]:
[0.9991559102070927, 0.9998196796918477]
In [46]:
real_rst.columns
Out[46]:
Index(['挥发分Vad(%)', '固定炭Fcad(%)'], dtype='object')
In [ ]:
for col in out_cols: pred_rst[col] = pred_rst[col] * (maxs[col] - mins[col]) + mins[col] real_rst[col] = real_rst[col] * (maxs[col] - mins[col]) + mins[col]
In [47]:
y_pred_vad = pred_rst['挥发分Vad(%)'].values.reshape(-1,) y_pred_fcad = pred_rst['固定炭Fcad(%)'].values.reshape(-1,) y_true_vad = real_rst['挥发分Vad(%)'].values.reshape(-1,) y_true_fcad = real_rst['固定炭Fcad(%)'].values.reshape(-1,)
In [56]:
pm25_eva = print_eva(y_true_vad, y_pred_vad, tp='挥发分Vad') pm10_eva = print_eva(y_true_fcad, y_pred_fcad, tp='固定炭Fcad')
COL: 挥发分Vad, MSE: 3.35E-01,RMSE: 0.579,MAPE: 1.639 %,MAE: 0.504,R_2: 0.87 COL: 固定炭Fcad, MSE: 1.11E+00,RMSE: 1.055,MAPE: 1.497 %,MAE: 0.814,R_2: 0.876
In [ ]:
In [ ]: