ai-station-code/meirejie/utils/data_prepare.py

79 lines
2.3 KiB
Python
Raw Permalink Normal View History

2025-05-06 11:18:48 +08:00
import pandas as pd
import numpy as np
import os
import sys
data = pd.read_excel("D:\\project\\ai_station\\meirejie\\data\\副本535.xlsx",sheet_name='Sheet1')
print(data.columns)
# 特征列
featrue_columns = ['A', 'V', 'FC', 'C', 'H', 'N', 'S', 'O', 'H/C', 'O/C', 'N/C', 'Rt','Hr', 'dp', 'T']
# 目标列
target_columns = ['Tar', 'Gas', 'Char', 'Water']
# 删除特征列中存在空值的行
print(data.shape)
filtered_data = data.dropna(subset=featrue_columns)
print(filtered_data.shape)
# 焦油数据集
print("焦油数据集处理")
target_columns = featrue_columns + ['Tar']
print(target_columns)
print(filtered_data.shape)
tar_data = filtered_data.dropna(subset=target_columns)
print(tar_data.shape)
# 目标列 大于20小于2的数据删除
tar_data = tar_data[(tar_data['Tar'] > 2) & (tar_data['Tar'] < 20)]
print(tar_data.shape)
# 煤气数据集
print("煤气数据集处理")
target_columns = featrue_columns + ['Gas']
print(target_columns)
print(filtered_data.shape)
Gas_data = filtered_data.dropna(subset=target_columns)
print(Gas_data.shape)
# 目标列 大于30小于2的数据删除
Gas_data = Gas_data[(Gas_data['Gas'] > 2) & (Gas_data['Gas'] < 30)]
print(Gas_data.shape)
# 数据集
print("炭渣数据集处理")
target_columns = featrue_columns + ['Char']
print(target_columns)
print(filtered_data.shape)
Char_data = filtered_data.dropna(subset=target_columns)
print(Char_data.shape)
# 目标列 大于30小于2的数据删除
# Char_data = Char_data[(Char_data['Char'] > 2) & (Char_data['Char'] < 30)]
print(Char_data.shape)
print("蒸汽数据集处理")
target_columns = featrue_columns + ['Water']
print(target_columns)
print(filtered_data.shape)
Water_data = filtered_data.dropna(subset=target_columns)
print(Water_data.shape)
# 目标列 大于30小于2的数据删除
Water_data = Water_data[(Water_data['Water'] > 0.5) & (Water_data['Water'] < 50)]
print(Water_data.shape)
tar_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\tar_data.csv',index=False)
Gas_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\gar_data.csv',index=False)
Char_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\char_data.csv',index=False)
Water_data.to_csv('D:\\project\\ai_station\\meirejie\\data\\water_data.csv',index=False)