building-agents/excel.py

41 lines
1.7 KiB
Python
Raw Normal View History

2024-11-22 10:03:31 +08:00
import pandas as pd
from fuzzywuzzy import fuzz
# 读取两个Excel文件
df1 = pd.read_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省.xlsx')
df2 = pd.read_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省_许可.xlsx')
# 打印df2的列名,确保列名正确
print("原始列名:", df2.columns)
df2.columns = df2.columns.str.strip()
# 确保列数据类型为字符串
df1['电厂名称'] = df1['电厂名称'].astype(str)
df2['电厂名称/项目名称'] = df2['电厂名称/项目名称'].astype(str)
df1['发电装机容量'] = df1['发电装机容量'].astype(float)
df2['总装机容量'] = df2['总装机容量'].astype(float)
# 定义一个函数来进行模糊匹配
def fuzzy_match(row, df2):
best_match = None
highest_score = 0
for index2, row2 in df2.iterrows():
score = fuzz.partial_ratio(row['电厂名称'], row2['电厂名称/项目名称'])
if score > highest_score and abs(row['发电装机容量'] - row2['总装机容量']) < 0.01:
highest_score = score
best_match = row2
return best_match
# 进行模糊匹配并填充数据
for index1, row1 in df1.iterrows():
match_row = fuzzy_match(row1, df2)
if match_row is not None:
df1.at[index1, '单位名称'] = match_row['单位名称']
df1.at[index1, '发电许可证编号'] = match_row['发电许可证编号']
df1.at[index1, '电厂名称/项目名称'] = match_row['电厂名称/项目名称']
df1.at[index1, '机组类型'] = match_row['机组类型']
# 将结果保存回Excel文件
df1.to_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省_填充后_模糊匹配.xlsx', index=False)
print("数据填充已保存")