79 lines
2.3 KiB
Python
79 lines
2.3 KiB
Python
|
import pandas as pd
|
|||
|
import numpy as np
|
|||
|
import os
|
|||
|
import sys
|
|||
|
|
|||
|
data = pd.read_excel("D:\\project\\ai_station\\meirejie\\data\\副本535.xlsx",sheet_name='Sheet1')
|
|||
|
print(data.columns)
|
|||
|
|
|||
|
|
|||
|
# 特征列
|
|||
|
featrue_columns = ['A', 'V', 'FC', 'C', 'H', 'N', 'S', 'O', 'H/C', 'O/C', 'N/C', 'Rt','Hr', 'dp', 'T']
|
|||
|
|
|||
|
# 目标列
|
|||
|
target_columns = ['Tar', 'Gas', 'Char', 'Water']
|
|||
|
|
|||
|
# 删除特征列中存在空值的行
|
|||
|
print(data.shape)
|
|||
|
filtered_data = data.dropna(subset=featrue_columns)
|
|||
|
print(filtered_data.shape)
|
|||
|
|
|||
|
|
|||
|
|
|||
|
# 焦油数据集
|
|||
|
print("焦油数据集处理")
|
|||
|
target_columns = featrue_columns + ['Tar']
|
|||
|
print(target_columns)
|
|||
|
print(filtered_data.shape)
|
|||
|
tar_data = filtered_data.dropna(subset=target_columns)
|
|||
|
print(tar_data.shape)
|
|||
|
|
|||
|
# 目标列 大于20,小于2的数据删除
|
|||
|
tar_data = tar_data[(tar_data['Tar'] > 2) & (tar_data['Tar'] < 20)]
|
|||
|
print(tar_data.shape)
|
|||
|
|
|||
|
|
|||
|
# 煤气数据集
|
|||
|
print("煤气数据集处理")
|
|||
|
target_columns = featrue_columns + ['Gas']
|
|||
|
print(target_columns)
|
|||
|
print(filtered_data.shape)
|
|||
|
Gas_data = filtered_data.dropna(subset=target_columns)
|
|||
|
print(Gas_data.shape)
|
|||
|
|
|||
|
# 目标列 大于30,小于2的数据删除
|
|||
|
Gas_data = Gas_data[(Gas_data['Gas'] > 2) & (Gas_data['Gas'] < 30)]
|
|||
|
print(Gas_data.shape)
|
|||
|
|
|||
|
|
|||
|
# 数据集
|
|||
|
print("炭渣数据集处理")
|
|||
|
target_columns = featrue_columns + ['Char']
|
|||
|
print(target_columns)
|
|||
|
print(filtered_data.shape)
|
|||
|
Char_data = filtered_data.dropna(subset=target_columns)
|
|||
|
print(Char_data.shape)
|
|||
|
|
|||
|
# 目标列 大于30,小于2的数据删除
|
|||
|
# Char_data = Char_data[(Char_data['Char'] > 2) & (Char_data['Char'] < 30)]
|
|||
|
print(Char_data.shape)
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
print("蒸汽数据集处理")
|
|||
|
target_columns = featrue_columns + ['Water']
|
|||
|
print(target_columns)
|
|||
|
print(filtered_data.shape)
|
|||
|
Water_data = filtered_data.dropna(subset=target_columns)
|
|||
|
print(Water_data.shape)
|
|||
|
|
|||
|
# 目标列 大于30,小于2的数据删除
|
|||
|
Water_data = Water_data[(Water_data['Water'] > 0.5) & (Water_data['Water'] < 50)]
|
|||
|
print(Water_data.shape)
|
|||
|
|
|||
|
|
|||
|
tar_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\tar_data.csv',index=False)
|
|||
|
Gas_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\gar_data.csv',index=False)
|
|||
|
Char_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\char_data.csv',index=False)
|
|||
|
Water_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\water_data.csv',index=False)
|