ai-station-code/meirejie/utils/data_prepare.py

79 lines
2.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
import os
import sys
data = pd.read_excel("D:\\project\\ai_station\\meirejie\\data\\副本535.xlsx",sheet_name='Sheet1')
print(data.columns)
# 特征列
featrue_columns = ['A', 'V', 'FC', 'C', 'H', 'N', 'S', 'O', 'H/C', 'O/C', 'N/C', 'Rt','Hr', 'dp', 'T']
# 目标列
target_columns = ['Tar', 'Gas', 'Char', 'Water']
# 删除特征列中存在空值的行
print(data.shape)
filtered_data = data.dropna(subset=featrue_columns)
print(filtered_data.shape)
# 焦油数据集
print("焦油数据集处理")
target_columns = featrue_columns + ['Tar']
print(target_columns)
print(filtered_data.shape)
tar_data = filtered_data.dropna(subset=target_columns)
print(tar_data.shape)
# 目标列 大于20小于2的数据删除
tar_data = tar_data[(tar_data['Tar'] > 2) & (tar_data['Tar'] < 20)]
print(tar_data.shape)
# 煤气数据集
print("煤气数据集处理")
target_columns = featrue_columns + ['Gas']
print(target_columns)
print(filtered_data.shape)
Gas_data = filtered_data.dropna(subset=target_columns)
print(Gas_data.shape)
# 目标列 大于30小于2的数据删除
Gas_data = Gas_data[(Gas_data['Gas'] > 2) & (Gas_data['Gas'] < 30)]
print(Gas_data.shape)
# 数据集
print("炭渣数据集处理")
target_columns = featrue_columns + ['Char']
print(target_columns)
print(filtered_data.shape)
Char_data = filtered_data.dropna(subset=target_columns)
print(Char_data.shape)
# 目标列 大于30小于2的数据删除
# Char_data = Char_data[(Char_data['Char'] > 2) & (Char_data['Char'] < 30)]
print(Char_data.shape)
print("蒸汽数据集处理")
target_columns = featrue_columns + ['Water']
print(target_columns)
print(filtered_data.shape)
Water_data = filtered_data.dropna(subset=target_columns)
print(Water_data.shape)
# 目标列 大于30小于2的数据删除
Water_data = Water_data[(Water_data['Water'] > 0.5) & (Water_data['Water'] < 50)]
print(Water_data.shape)
tar_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\tar_data.csv',index=False)
Gas_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\gar_data.csv',index=False)
Char_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\char_data.csv',index=False)
Water_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\water_data.csv',index=False)