{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "85efd702", "metadata": { "ExecuteTime": { "end_time": "2023-10-16T03:18:58.543687Z", "start_time": "2023-10-16T03:18:57.501238Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "id": "f033dbeb", "metadata": { "ExecuteTime": { "end_time": "2023-10-16T03:18:58.763265Z", "start_time": "2023-10-16T03:18:58.746711Z" } }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "plt.rcParams[\"font.sans-serif\"]=[\"SimHei\"] #设置字体\n", "plt.rcParams[\"axes.unicode_minus\"]=False #该语句解决图像中的“-”负号的乱码问题" ] }, { "cell_type": "code", "execution_count": 3, "id": "a591643d", "metadata": { "ExecuteTime": { "end_time": "2023-10-16T03:18:59.273154Z", "start_time": "2023-10-16T03:18:59.262015Z" } }, "outputs": [], "source": [ "data_path = \"./data/煤质碳材料数据.xlsx\"" ] }, { "cell_type": "code", "execution_count": 4, "id": "92e9b814", "metadata": { "ExecuteTime": { "end_time": "2023-10-16T03:19:00.156694Z", "start_time": "2023-10-16T03:18:59.835913Z" } }, "outputs": [], "source": [ "data = pd.read_excel(data_path)" ] }, { "cell_type": "code", "execution_count": 5, "id": "22f0399a", "metadata": { "ExecuteTime": { "end_time": "2023-10-16T03:19:01.192314Z", "start_time": "2023-10-16T03:19:01.160808Z" } }, "outputs": [], "source": [ "# 缺失值填充\n", "## 根据煤种分类,将缺失值进行补全,采用均值填充\n", "## 用不上,因为分析数据,X值缺失的时候,Y也会缺失\n", "## 萃取中级烟煤还只有一条数据,无法参考补全\n", "## 直接删除X缺失数据\n", "\n", "# 补全:\n", "# grouped = data.groupby('煤种')\n", "# # 定义填充函数\n", "# def fill_with_mean(group, name):\n", "# group[name].fillna(group[name].mean(), inplace=True)\n", "# return group\n", "\n", "# # 在每个分组内使用均值法填充空值\n", "# data_filled = grouped.apply(fill_with_mean,\"分析水Mad\")\n", "\n", "# 删除某一列\n", "data_full = data.dropna(axis=0,subset = ['编号', '煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',\n", " '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3'])\n", "del data_full['编号']" ] }, { "cell_type": "code", "execution_count": 1, "id": "2e33f252-d36c-4d29-a565-ad867e218d26", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/root/.cache/matplotlib\n" ] } ], "source": [ "import matplotlib as mpl\n", "print(mpl.get_cachedir())" ] }, { "cell_type": "code", "execution_count": 6, "id": "1150d735", "metadata": { "ExecuteTime": { "end_time": "2023-10-16T03:19:01.593771Z", "start_time": "2023-10-16T03:19:01.587725Z" } }, "outputs": [], "source": [ "data_full = data_full.reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "f2c58153", "metadata": { "ExecuteTime": { "end_time": "2023-10-16T03:19:02.091979Z", "start_time": "2023-10-16T03:19:02.083594Z" } }, "outputs": [], "source": [ "# one-hot 编码处理煤种数据\n", "#from sklearn.preprocessing import OneHotEncoder\n", "# encoder = OneHotEncoder()\n", "# encoded_data = encoder.fit_transform(data_full[['煤种']])\n", "# # 将稀疏矩阵转换为数组\n", "# encoded_array = encoded_data.toarray()\n", "# # 创建编码后的 DataFrame\n", "# encoded_df = pd.DataFrame(encoded_array, columns=encoder.get_feature_names_out(['煤种']))\n", "# data_full_one_hot = pd.concat([data_full, encoded_df], axis=1)\n", "# del data_full_one_hot['煤种']" ] }, { "cell_type": "code", "execution_count": 8, "id": "1fbce5b5", "metadata": { "ExecuteTime": { "end_time": "2023-10-16T03:19:05.659555Z", "start_time": "2023-10-16T03:19:02.373148Z" } }, "outputs": [], "source": [ "# 字典映射后准备归一化\n", "from sklearn.preprocessing import LabelEncoder\n", "encoder = LabelEncoder()\n", "encoded_labels = encoder.fit_transform(data_full['煤种'])\n", "data_full['煤种'] = encoded_labels" ] }, { "cell_type": "code", "execution_count": 9, "id": "d8bde48c", "metadata": { "ExecuteTime": { "end_time": "2023-10-16T03:19:05.674831Z", "start_time": "2023-10-16T03:19:05.661534Z" } }, "outputs": [], "source": [ "# 归一化,数据量太小了,且数据无正态分布一说感觉,直接归一化\n", "\n", "x_col = ['煤种', '分析水Mad', '灰分', '挥发分', '碳', '氢', '氮', '硫', '氧', '碳化温度(℃)',\n", " '升温速率(℃/min)', '保温时间(h)', 'KOH', 'K2CO3']\n", "\n", "y_col = ['孔体积(cm3/g)','微孔体积(cm3/g)', '介孔体积(cm3/g)','BET比表面积(m2/g)']\n", "# from sklearn.preprocessing import StandardScaler\n", "# scaler = StandardScaler()\n", "# normalized_data = scaler.fit_transform(data_full[x_col])\n", "\n", "from sklearn.preprocessing import MinMaxScaler\n", "\n", "scaler = MinMaxScaler()\n", "\n", "normalized_data = scaler.fit_transform(data_full[x_col])\n", "normalized_df = pd.DataFrame(normalized_data, columns=x_col)\n", "data_full_minmax = pd.concat([normalized_df,data_full[y_col]],axis=1)" ] }, { "cell_type": "code", "execution_count": 10, "id": "655874e0", "metadata": { "ExecuteTime": { "end_time": "2023-10-16T03:19:05.705330Z", "start_time": "2023-10-16T03:19:05.675810Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " | 煤种 | \n", "分析水Mad | \n", "灰分 | \n", "挥发分 | \n", "碳 | \n", "氢 | \n", "氮 | \n", "硫 | \n", "氧 | \n", "碳化温度(℃) | \n", "升温速率(℃/min) | \n", "保温时间(h) | \n", "KOH | \n", "K2CO3 | \n", "孔体积(cm3/g) | \n", "微孔体积(cm3/g) | \n", "介孔体积(cm3/g) | \n", "BET比表面积(m2/g) | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.090909 | \n", "0.040520 | \n", "0.176027 | \n", "0.579416 | \n", "0.897402 | \n", "0.181024 | \n", "0.333333 | \n", "0.000000 | \n", "0.141774 | \n", "1.0 | \n", "0.000000 | \n", "0.6 | \n", "0.0 | \n", "0.0 | \n", "0.270 | \n", "NaN | \n", "NaN | \n", "296.0 | \n", "
1 | \n", "0.727273 | \n", "0.436127 | \n", "0.089271 | \n", "0.755583 | \n", "0.552794 | \n", "0.131548 | \n", "0.245763 | \n", "1.000000 | \n", "0.670623 | \n", "0.1 | \n", "0.285714 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.356 | \n", "0.289 | \n", "0.067 | \n", "665.0 | \n", "
2 | \n", "0.727273 | \n", "0.436127 | \n", "0.089271 | \n", "0.755583 | \n", "0.552794 | \n", "0.131548 | \n", "0.245763 | \n", "1.000000 | \n", "0.670623 | \n", "0.1 | \n", "0.285714 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.608 | \n", "0.482 | \n", "0.126 | \n", "1221.0 | \n", "
3 | \n", "0.727273 | \n", "0.436127 | \n", "0.089271 | \n", "0.755583 | \n", "0.552794 | \n", "0.131548 | \n", "0.245763 | \n", "1.000000 | \n", "0.670623 | \n", "0.1 | \n", "0.285714 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "1.438 | \n", "0.670 | \n", "0.768 | \n", "2609.0 | \n", "
4 | \n", "0.727273 | \n", "0.436127 | \n", "0.089271 | \n", "0.755583 | \n", "0.552794 | \n", "0.131548 | \n", "0.245763 | \n", "1.000000 | \n", "0.670623 | \n", "0.1 | \n", "0.285714 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "1.321 | \n", "0.599 | \n", "0.722 | \n", "2323.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
64 | \n", "0.272727 | \n", "0.000000 | \n", "0.085080 | \n", "0.151960 | \n", "0.998131 | \n", "0.096042 | \n", "0.378531 | \n", "0.058989 | \n", "0.000000 | \n", "0.4 | \n", "0.107143 | \n", "0.2 | \n", "1.0 | \n", "0.0 | \n", "1.608 | \n", "1.204 | \n", "0.404 | \n", "3142.0 | \n", "
65 | \n", "0.272727 | \n", "0.000000 | \n", "0.085080 | \n", "0.151960 | \n", "0.998131 | \n", "0.096042 | \n", "0.378531 | \n", "0.058989 | \n", "0.000000 | \n", "0.4 | \n", "0.107143 | \n", "0.2 | \n", "1.0 | \n", "0.0 | \n", "2.041 | \n", "1.022 | \n", "1.019 | \n", "3389.0 | \n", "
66 | \n", "0.272727 | \n", "0.002165 | \n", "0.174560 | \n", "0.137279 | \n", "1.000000 | \n", "0.000000 | \n", "0.457627 | \n", "0.000000 | \n", "0.048797 | \n", "0.2 | \n", "0.107143 | \n", "0.2 | \n", "1.0 | \n", "0.0 | \n", "1.135 | \n", "0.916 | \n", "0.219 | \n", "2542.0 | \n", "
67 | \n", "0.272727 | \n", "0.002165 | \n", "0.174560 | \n", "0.137279 | \n", "1.000000 | \n", "0.000000 | \n", "0.457627 | \n", "0.000000 | \n", "0.048797 | \n", "0.4 | \n", "0.107143 | \n", "0.2 | \n", "1.0 | \n", "0.0 | \n", "1.219 | \n", "0.947 | \n", "0.272 | \n", "2665.0 | \n", "
68 | \n", "0.272727 | \n", "0.002165 | \n", "0.174560 | \n", "0.137279 | \n", "1.000000 | \n", "0.000000 | \n", "0.457627 | \n", "0.000000 | \n", "0.048797 | \n", "0.6 | \n", "0.107143 | \n", "0.2 | \n", "1.0 | \n", "0.0 | \n", "1.473 | \n", "0.718 | \n", "0.755 | \n", "2947.0 | \n", "
69 rows × 18 columns
\n", "