79 lines
2.3 KiB
Python
79 lines
2.3 KiB
Python
import pandas as pd
|
||
import numpy as np
|
||
import os
|
||
import sys
|
||
|
||
data = pd.read_excel("D:\\project\\ai_station\\meirejie\\data\\副本535.xlsx",sheet_name='Sheet1')
|
||
print(data.columns)
|
||
|
||
|
||
# 特征列
|
||
featrue_columns = ['A', 'V', 'FC', 'C', 'H', 'N', 'S', 'O', 'H/C', 'O/C', 'N/C', 'Rt','Hr', 'dp', 'T']
|
||
|
||
# 目标列
|
||
target_columns = ['Tar', 'Gas', 'Char', 'Water']
|
||
|
||
# 删除特征列中存在空值的行
|
||
print(data.shape)
|
||
filtered_data = data.dropna(subset=featrue_columns)
|
||
print(filtered_data.shape)
|
||
|
||
|
||
|
||
# 焦油数据集
|
||
print("焦油数据集处理")
|
||
target_columns = featrue_columns + ['Tar']
|
||
print(target_columns)
|
||
print(filtered_data.shape)
|
||
tar_data = filtered_data.dropna(subset=target_columns)
|
||
print(tar_data.shape)
|
||
|
||
# 目标列 大于20,小于2的数据删除
|
||
tar_data = tar_data[(tar_data['Tar'] > 2) & (tar_data['Tar'] < 20)]
|
||
print(tar_data.shape)
|
||
|
||
|
||
# 煤气数据集
|
||
print("煤气数据集处理")
|
||
target_columns = featrue_columns + ['Gas']
|
||
print(target_columns)
|
||
print(filtered_data.shape)
|
||
Gas_data = filtered_data.dropna(subset=target_columns)
|
||
print(Gas_data.shape)
|
||
|
||
# 目标列 大于30,小于2的数据删除
|
||
Gas_data = Gas_data[(Gas_data['Gas'] > 2) & (Gas_data['Gas'] < 30)]
|
||
print(Gas_data.shape)
|
||
|
||
|
||
# 数据集
|
||
print("炭渣数据集处理")
|
||
target_columns = featrue_columns + ['Char']
|
||
print(target_columns)
|
||
print(filtered_data.shape)
|
||
Char_data = filtered_data.dropna(subset=target_columns)
|
||
print(Char_data.shape)
|
||
|
||
# 目标列 大于30,小于2的数据删除
|
||
# Char_data = Char_data[(Char_data['Char'] > 2) & (Char_data['Char'] < 30)]
|
||
print(Char_data.shape)
|
||
|
||
|
||
|
||
|
||
print("蒸汽数据集处理")
|
||
target_columns = featrue_columns + ['Water']
|
||
print(target_columns)
|
||
print(filtered_data.shape)
|
||
Water_data = filtered_data.dropna(subset=target_columns)
|
||
print(Water_data.shape)
|
||
|
||
# 目标列 大于30,小于2的数据删除
|
||
Water_data = Water_data[(Water_data['Water'] > 0.5) & (Water_data['Water'] < 50)]
|
||
print(Water_data.shape)
|
||
|
||
|
||
tar_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\tar_data.csv',index=False)
|
||
Gas_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\gar_data.csv',index=False)
|
||
Char_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\char_data.csv',index=False)
|
||
Water_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\water_data.csv',index=False) |