coal_materials/未命名.ipynb

964 KiB
Raw Permalink Blame History

In [1]:
from statistics import mean
import matplotlib.pyplot as plt
from sklearn.metrics import explained_variance_score,r2_score,median_absolute_error,mean_squared_error,mean_absolute_error
from scipy import stats
import numpy as np
plt.rcParams["font.sans-serif"] = ["SimHei"]  # 设置字体
plt.rcParams["font.size"] = 16
plt.rcParams["axes.unicode_minus"] = False  # 正常显示负号
In [20]:
def scatter_out_1(x,y): ## x,y为两个需要做对比分析的两个量。
    # ==========计算评价指标==========
    BIAS = mean(x - y)
    MSE = mean_squared_error(x, y)
    RMSE = np.power(MSE, 0.5)
    R2 = r2_score(x, y)
    MAE = mean_absolute_error(x, y)
    EV = explained_variance_score(x, y)
    print('==========算法评价指标==========')
    print('BIAS:', '%.3f' % (BIAS))
    print('Explained Variance(EV):', '%.3f' % (EV))
    print('Mean Absolute Error(MAE):', '%.3f' % (MAE))
    print('Mean squared error(MSE):', '%.3f' % (MSE))
    print('Root Mean Squard Error(RMSE):', '%.3f' % (RMSE))
    print('R_squared:', '%.3f' % (R2))
    # ===========Calculate the point density==========
    xy = np.vstack([x, y])
    z = stats.gaussian_kde(xy)(xy)
    # ===========Sort the points by density, so that the densest points are plotted last===========
    idx = z.argsort()
    x, y, z = x[idx], y[idx], z[idx]
    def best_fit_slope_and_intercept(xs, ys):
        m = (((mean(xs) * mean(ys)) - mean(xs * ys)) / ((mean(xs) * mean(xs)) - mean(xs * xs)))
        b = mean(ys) - m * mean(xs)
        return m, b
    m, b = best_fit_slope_and_intercept(x, y)
    regression_line = []
    for a in x:
        regression_line.append((m * a) + b)
    fig,ax=plt.subplots(figsize=(12,9),dpi=600)
    scatter=ax.scatter(x,y,marker='o',c=z, edgecolors='b',s=15,label='LST',cmap='Spectral_r')
    cbar=plt.colorbar(scatter,shrink=1,orientation='vertical',extend='both',pad=0.015,aspect=30,label='frequency', )
    plt.plot([0,35],[0,35],'black',lw=1.5)  # 画的1:1线线的颜色为black线宽为0.8
    plt.plot(x,regression_line,'red',lw=1.5)      # 预测与实测数据之间的回归线
    plt.axis([0,35,0,35])  # 设置线的范围
    plt.title("总孔体积拟合结果 $10^2 cm^3$", fontdict={"fontsize":16})
    plt.xlabel('预测值', fontdict={"fontsize":16})
    plt.ylabel('真实值', fontdict={"fontsize":16})
    plt.text(0.5,34, '$N=%.f$' % len(y), fontdict={"fontsize":16}) # text的位置需要根据x,y的大小范围进行调整。
    plt.text(0.5,33, '$R^2=%.3f$' % R2, fontdict={"fontsize":16})
    plt.text(0.5,32, '$BIAS=%.4f$' % BIAS, fontdict={"fontsize":16})
    plt.text(0.5,31, '$RMSE=%.3f$' % RMSE, fontdict={"fontsize":16})
    plt.xlim(0,35)                                  # 设置x坐标轴的显示范围
    plt.ylim(0,35)                                  # 设置y坐标轴的显示范围
    plt.savefig('./总孔体积.png',dpi=300, bbox_inches='tight',pad_inches=0)
    plt.show()
In [21]:
import pandas as pd
In [22]:
df = pd.read_csv('./rst/总孔体积_比表.csv')
In [23]:
df.describe()
Out[23]:
真实值 预测值
count 184.000000 184.000000
mean 267.871020 272.776239
std 696.475264 693.395059
min 0.060000 0.069085
25% 0.539250 0.570501
50% 0.877000 0.889113
75% 1.673250 1.551479
max 3322.000000 3225.575700
In [24]:
scatter_out_1(df['预测值'].values/100, df['真实值'].values/100)
==========算法评价指标==========
BIAS: 0.049
Explained Variance(EV): 0.921
Mean Absolute Error(MAE): 0.622
Mean squared error(MSE): 3.797
Root Mean Squard Error(RMSE): 1.949
R_squared: 0.921
No description has been provided for this image
In [12]:
samples = np.random.choice(df.index.values, 50, replace=False)
plt.figure(figsize=(12, 9))
plt.plot(range(len(samples)), df.iloc[samples]['预测值'].values, 'o-', label='预测值')
plt.plot(range(len(samples)), df.iloc[samples]['真实值'].values, '*-', label='真实值')
plt.xlabel('预测值 $(10^2 cm^3/g)$', fontdict={"fontsize":16})
plt.ylabel('真实值 $(10^2 cm^3/g)$', fontdict={"fontsize":16})
plt.title('氮气吸附量拟合结果')
plt.legend(loc='best')
plt.show()
No description has been provided for this image
In [ ]: