import pandas as pd from fuzzywuzzy import fuzz # 读取两个Excel文件 df1 = pd.read_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省.xlsx') df2 = pd.read_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省_许可.xlsx') # 打印df2的列名,确保列名正确 print("原始列名:", df2.columns) df2.columns = df2.columns.str.strip() # 确保列数据类型为字符串 df1['电厂名称'] = df1['电厂名称'].astype(str) df2['电厂名称/项目名称'] = df2['电厂名称/项目名称'].astype(str) df1['发电装机容量'] = df1['发电装机容量'].astype(float) df2['总装机容量'] = df2['总装机容量'].astype(float) # 定义一个函数来进行模糊匹配 def fuzzy_match(row, df2): best_match = None highest_score = 0 for index2, row2 in df2.iterrows(): score = fuzz.partial_ratio(row['电厂名称'], row2['电厂名称/项目名称']) if score > highest_score and abs(row['发电装机容量'] - row2['总装机容量']) < 0.01: highest_score = score best_match = row2 return best_match # 进行模糊匹配并填充数据 for index1, row1 in df1.iterrows(): match_row = fuzzy_match(row1, df2) if match_row is not None: df1.at[index1, '单位名称'] = match_row['单位名称'] df1.at[index1, '发电许可证编号'] = match_row['发电许可证编号'] df1.at[index1, '电厂名称/项目名称'] = match_row['电厂名称/项目名称'] df1.at[index1, '机组类型'] = match_row['机组类型'] # 将结果保存回Excel文件 df1.to_excel(r'C:\Users\97532\Desktop\陕西、福建_陈晓东\福建省_填充后_模糊匹配.xlsx', index=False) print("数据填充已保存")