189 KiB
189 KiB
- 首先生成几个针对question.txt的结果,每行一个,存储在txt中;
- 选择哪几个模型?
- GPT系列
- GLM3
- 百度
- Qwen1.5-72b-chat
https://github.com/yuyouyu32/LLMQAEvaluate?tab=readme-ov-file
In [1]:
import numpy as np import pandas as pd from transformers import AutoTokenizer,AutoModel import torch from sklearn.metrics.pairwise import cosine_similarity from sentence_transformers import SentenceTransformer import pandas as pd import os device = "cuda" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "1,0"
[2025-01-08 16:01:01,433] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
In [2]:
model_name = "/home/zhangxj/models/acge_text_embedding" tokenizer = AutoTokenizer.from_pretrained(model_name) # model = AutoModel.from_pretrained(model_name) model = SentenceTransformer(model_name)
In [3]:
def embedding(text): # inputs = tokenizer(text,return_tenors="pt",padding=True,truncation=True,max_length=512) # with torch.no_grad(): # outputs = model(**inputs) # embeddings = outputs.last_hidden_state.mean(dim=1) embeddings = model.encode(text,normalize_embeddings=True) return embeddings emb1 = embedding(["你好,这里是中国","欢迎你来到中国!"]) # from numpy.linalg import norm # cos_sim = lambda a,b:(a@b.T)/(norm(a)*norm(b)) print(emb1.shape)
(2, 1792)
In [4]:
import re def remove_punctuation(text): # 正则表达式匹配中文标点和英文标点 pstr = r""#$&'()*+,-/:;@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·!?。。" return re.sub(pstr, ' ', text) def get_ans_list(file_path): answers = [] with open(file_path,"r",encoding="utf-8") as file: for line in file.readlines(): answers.append(line.strip()) results = [remove_punctuation(ans) for ans in answers] return results
In [5]:
answers = get_ans_list("/home/zhangxj/WorkFile/LCA-GPT/QA/filters/answers.txt") answer_rag = get_ans_list("/home/zhangxj/WorkFile/LCA-GPT/QA/eval/RAGpred.txt") answer_qwen72 = get_ans_list("/home/zhangxj/WorkFile/LCA-GPT/QA/eval/Qwen72bpred.txt") answer_glm = get_ans_list("/home/zhangxj/WorkFile/LCA-GPT/QA/eval/GLMpred.txt") answer_baidu = get_ans_list("/home/zhangxj/WorkFile/LCA-GPT/QA/eval/ERNIEpred.txt")
In [6]:
# 获取每个类别的答案和文本 data_class = pd.read_excel("/home/zhangxj/WorkFile/LCA-GPT/QA/classify_new.xlsx") data_class.head()
Out[6]:
question | answer | 子类别 | |
---|---|---|---|
0 | 什么是生命周期分析(LCA)的主要目标? | 生命周期分析旨在评估产品或服务从原材料获取到最终处置的环境影响。 | LCA理论与相关知识 |
1 | 在LCA中,如何确定研究的范围? | 研究范围包括定义系统边界,如输入、输出、功能单位和分析阶段。 | 研究和试验发展 |
2 | 文档中提到的医疗废物如何处理? | 文档未直接说明医疗废物的具体处理方法,只提及了与之相关的能源消耗。 | 卫生和社会工作 |
3 | LCA数据清单收集阶段需要哪些信息? | 数据清单需收集所有过程的输入输出数据,包括资源消耗、排放和能源使用。 | LCA理论与相关知识 |
4 | 生命周期影响评价阶段的目标是什么? | 该阶段旨在量化每个阶段对环境的各种影响,如气候变化、水耗和土地使用。 | 生态保护和环境治理业 |
In [7]:
def classify_ans(answer,class_name): '''从全部的答案列表中筛选出目标类别的答案''' ans_class = [] for idx,item in data_class.iterrows(): if item['子类别'] == class_name: ans_class.append(answer[idx]) return ans_class
In [8]:
# 字典存储分类别的内容 ans_gold = dict() ans_rag = dict() ans_qwen72 = dict() ans_glm = dict() ans_baidu = dict() class_top10 = ["LCA理论与相关知识","生态保护和环境治理业","研究和试验发展","建筑业","非金属矿物制品业","化学原料和化学制品制造业","废弃资源综合利用业","农、林、牧、渔业","电力、热力生产和供应业","汽车制造业"] for clas in class_top10: ans_gold[clas] = classify_ans(answers,clas) ans_rag[clas] = classify_ans(answer_rag,clas) ans_qwen72[clas] = classify_ans(answer_qwen72,clas) ans_glm[clas] = classify_ans(answer_glm,clas) ans_baidu[clas] = classify_ans(answer_baidu,clas) print(len(ans_gold[clas])) print(len(ans_glm[clas]))
790 790 754 754 321 321 295 295 183 183 174 174 157 157 131 131 126 126 87 87
In [9]:
# 以字典形式获取每个类别的文本向量 emb_ans = dict() emb_rag = dict() emb_qwen72 = dict() emb_glm = dict() emb_baidu = dict() for clas in class_top10: emb_ans[clas] = embedding(ans_gold[clas]) emb_rag[clas] = embedding(ans_rag[clas]) emb_qwen72[clas] = embedding(ans_qwen72[clas]) emb_glm[clas] = embedding(ans_glm[clas]) emb_baidu[clas] = embedding(ans_baidu[clas])
In [10]:
### 指标1:余弦相似度,计算所有回答的cos_sim,取平均 def cos_sim(target,pred): ''' ans,pred的数据格式是numpy.narray''' cos_sim_list = [] for i in range(target.shape[0]): dot_product = np.dot(target[i],pred[i]) norm_target = np.linalg.norm(target[i]) norm_pred = np.linalg.norm(target[i]) cos = dot_product/(norm_target*norm_pred) cos_sim_list.append(cos) avg_cos_sim = np.mean(cos_sim_list) return avg_cos_sim
In [11]:
class_cos = dict() for clas in class_top10: print("类别:",clas) cos_dict = dict() cos_dict['LCA-GPT'] = cos_sim(emb_ans[clas],emb_rag[clas]) cos_dict['GLM-4'] = cos_sim(emb_ans[clas],emb_glm[clas]) cos_dict['ERNIE-3.5-8K'] = cos_sim(emb_ans[clas],emb_baidu[clas]) cos_dict['Qwen1.5-72b'] = cos_sim(emb_ans[clas],emb_qwen72[clas]) class_cos[clas] = cos_dict print(cos_dict)
类别: LCA理论与相关知识 {'LCA-GPT': 0.7563889, 'GLM-4': 0.72979796, 'ERNIE-3.5-8K': 0.73387533, 'Qwen1.5-72b': 0.7237778} 类别: 生态保护和环境治理业 {'LCA-GPT': 0.76573056, 'GLM-4': 0.7285658, 'ERNIE-3.5-8K': 0.73159605, 'Qwen1.5-72b': 0.7110586} 类别: 研究和试验发展 {'LCA-GPT': 0.74416304, 'GLM-4': 0.706629, 'ERNIE-3.5-8K': 0.6975246, 'Qwen1.5-72b': 0.68896115} 类别: 建筑业 {'LCA-GPT': 0.76548576, 'GLM-4': 0.7384504, 'ERNIE-3.5-8K': 0.7213398, 'Qwen1.5-72b': 0.71236473} 类别: 非金属矿物制品业 {'LCA-GPT': 0.7918446, 'GLM-4': 0.74560964, 'ERNIE-3.5-8K': 0.7342301, 'Qwen1.5-72b': 0.7142454} 类别: 化学原料和化学制品制造业 {'LCA-GPT': 0.80663353, 'GLM-4': 0.75003314, 'ERNIE-3.5-8K': 0.73770964, 'Qwen1.5-72b': 0.72282004} 类别: 废弃资源综合利用业 {'LCA-GPT': 0.78028744, 'GLM-4': 0.73390573, 'ERNIE-3.5-8K': 0.7344841, 'Qwen1.5-72b': 0.716429} 类别: 农、林、牧、渔业 {'LCA-GPT': 0.7865737, 'GLM-4': 0.7173627, 'ERNIE-3.5-8K': 0.7319357, 'Qwen1.5-72b': 0.69628036} 类别: 电力、热力生产和供应业 {'LCA-GPT': 0.7891359, 'GLM-4': 0.7410214, 'ERNIE-3.5-8K': 0.7426608, 'Qwen1.5-72b': 0.7154095} 类别: 汽车制造业 {'LCA-GPT': 0.7961327, 'GLM-4': 0.7537849, 'ERNIE-3.5-8K': 0.7584841, 'Qwen1.5-72b': 0.72462904}
In [18]:
df_cos = pd.DataFrame.from_dict(class_cos,orient='index').T df_cos.to_csv("/home/zhangxj/WorkFile/LCA-GPT/LCA_RAG/data/eval/cos.csv",index=False,encoding="utf-8")
In [19]:
### f1值 import jieba import collections def cal_f1(target,pred): target_token = list(jieba.cut(target,cut_all=False)) pred_token = list(jieba.cut(pred,cut_all=False)) common = collections.Counter(target_token) & collections.Counter(pred_token) num_same = sum(common.values()) if len(target_token) == 0 or len(pred_token) == 0: return int(target_token == pred_token) if num_same == 0: return 0 precision = 1.0*num_same/len(pred_token) recall = 1.0*num_same/len(target_token) f1 = (2.0*recall*precision) /(precision+recall) return f1 def calf1_all(target,pred): f1s = [] for tar,pre in zip(target,pred): f1 = cal_f1(tar,pre) f1s.append(f1) return np.mean(f1s)
In [20]:
class_f1 = dict() for clas in class_top10: print("类别:",clas) f1_dict = dict() f1_dict['LCA-GPT'] = calf1_all(ans_gold[clas],ans_rag[clas]) f1_dict['GLM-4'] = calf1_all(ans_gold[clas],ans_glm[clas]) f1_dict['ERNIE-3.5-8K'] = calf1_all(ans_gold[clas],ans_baidu[clas]) f1_dict['Qwen1.5-72b'] = calf1_all(ans_gold[clas],ans_qwen72[clas]) class_f1[clas] = f1_dict print(f1_dict)
Building prefix dict from the default dictionary ...
类别: LCA理论与相关知识
Dumping model to file cache /tmp/jieba.cache Loading model cost 0.679 seconds. Prefix dict has been built successfully.
{'LCA-GPT': 0.3437605081212203, 'GLM-4': 0.30454084953051963, 'ERNIE-3.5-8K': 0.30268024101560764, 'Qwen1.5-72b': 0.2682497767547755} 类别: 生态保护和环境治理业 {'LCA-GPT': 0.3483626691289569, 'GLM-4': 0.2788926668662055, 'ERNIE-3.5-8K': 0.27761325010853644, 'Qwen1.5-72b': 0.2302952772337834} 类别: 研究和试验发展 {'LCA-GPT': 0.3430343943468575, 'GLM-4': 0.2821541495127059, 'ERNIE-3.5-8K': 0.2729394816189532, 'Qwen1.5-72b': 0.23015648181836218} 类别: 建筑业 {'LCA-GPT': 0.39371906828313225, 'GLM-4': 0.31389786788066487, 'ERNIE-3.5-8K': 0.28676175803618814, 'Qwen1.5-72b': 0.2204392856411433} 类别: 非金属矿物制品业 {'LCA-GPT': 0.4130349692057703, 'GLM-4': 0.297081206460411, 'ERNIE-3.5-8K': 0.25563488130229556, 'Qwen1.5-72b': 0.21248638601796016} 类别: 化学原料和化学制品制造业 {'LCA-GPT': 0.4185145863497029, 'GLM-4': 0.2931829455107586, 'ERNIE-3.5-8K': 0.2694429593634037, 'Qwen1.5-72b': 0.22850507493329048} 类别: 废弃资源综合利用业 {'LCA-GPT': 0.38042435656104256, 'GLM-4': 0.28198554328062225, 'ERNIE-3.5-8K': 0.2718793531798568, 'Qwen1.5-72b': 0.2066845950860928} 类别: 农、林、牧、渔业 {'LCA-GPT': 0.39036740247556595, 'GLM-4': 0.27194804107827547, 'ERNIE-3.5-8K': 0.27189044583195743, 'Qwen1.5-72b': 0.20235451922007622} 类别: 电力、热力生产和供应业 {'LCA-GPT': 0.41114581401949746, 'GLM-4': 0.26988029172611805, 'ERNIE-3.5-8K': 0.2713352793043654, 'Qwen1.5-72b': 0.18898670597783235} 类别: 汽车制造业 {'LCA-GPT': 0.4130216859020192, 'GLM-4': 0.3179324748633672, 'ERNIE-3.5-8K': 0.3088808476140809, 'Qwen1.5-72b': 0.2327820200502282}
In [21]:
df_f1 = pd.DataFrame.from_dict(class_f1,orient='index').T df_f1.to_csv("/home/zhangxj/WorkFile/LCA-GPT/LCA_RAG/data/eval/f1.csv",index=False,encoding="utf-8")
In [22]:
### 指标2:BLEU 支持中文? import nltk from nltk.translate.bleu_score import sentence_bleu,SmoothingFunction import jieba ''' 到时候需要遍历整个文档的每一行进行计算,之后统计平均值''' def Recall(target,pred): ''' 直接传入文本格式的答案和预测结果''' # 文本分解为句子 target_list = list(target) pred_list = list(pred) # print(target_list) smooth = SmoothingFunction() # 计算bleu score = sentence_bleu([target_list],pred_list,smoothing_function=smooth.method2) return score def bleu_mean(target,pred): ''' 列表''' bleu = [] for tar,pre in zip(target,pred): recall = Recall(tar,pre) bleu.append(recall) return np.mean(bleu)
In [23]:
for clas in class_top10: print("类别:",clas) bleu_dict = dict() bleu_dict['LCA-GPT'] = bleu_mean(ans_gold[clas],ans_rag[clas]) bleu_dict['GLM-4'] = bleu_mean(ans_gold[clas],ans_glm[clas]) bleu_dict['ERNIE-3.5-8K'] = bleu_mean(ans_gold[clas],ans_baidu[clas]) bleu_dict['Qwen1.5-72b'] = bleu_mean(ans_gold[clas],ans_qwen72[clas]) print(bleu_dict)
类别: LCA理论与相关知识 {'LCA-GPT': 0.1805466558991861, 'GLM-4': 0.11554456962082649, 'ERNIE-3.5-8K': 0.12240250499083317, 'Qwen1.5-72b': 0.10443194919613479} 类别: 生态保护和环境治理业 {'LCA-GPT': 0.1936086630791969, 'GLM-4': 0.10723241319036586, 'ERNIE-3.5-8K': 0.11146362502253516, 'Qwen1.5-72b': 0.08702864761847347} 类别: 研究和试验发展 {'LCA-GPT': 0.18237757004342872, 'GLM-4': 0.10627954683010601, 'ERNIE-3.5-8K': 0.1053829450454662, 'Qwen1.5-72b': 0.08501998659280752} 类别: 建筑业 {'LCA-GPT': 0.22567381216712698, 'GLM-4': 0.1310207093784512, 'ERNIE-3.5-8K': 0.11733150982192744, 'Qwen1.5-72b': 0.08621213867998642} 类别: 非金属矿物制品业 {'LCA-GPT': 0.26680615981340805, 'GLM-4': 0.13232836478838658, 'ERNIE-3.5-8K': 0.10948499320594085, 'Qwen1.5-72b': 0.08474905129676888} 类别: 化学原料和化学制品制造业 {'LCA-GPT': 0.26724369675199966, 'GLM-4': 0.12509640511891365, 'ERNIE-3.5-8K': 0.11269336039381148, 'Qwen1.5-72b': 0.09118967633955695} 类别: 废弃资源综合利用业 {'LCA-GPT': 0.22468697711112326, 'GLM-4': 0.11250649617963052, 'ERNIE-3.5-8K': 0.11120199316111042, 'Qwen1.5-72b': 0.07802856134904246} 类别: 农、林、牧、渔业 {'LCA-GPT': 0.2464707549734943, 'GLM-4': 0.10604667409601695, 'ERNIE-3.5-8K': 0.1091459446440835, 'Qwen1.5-72b': 0.07117894187328233} 类别: 电力、热力生产和供应业 {'LCA-GPT': 0.25818318417049985, 'GLM-4': 0.11724440564237175, 'ERNIE-3.5-8K': 0.12456420117921357, 'Qwen1.5-72b': 0.07637114311250687} 类别: 汽车制造业 {'LCA-GPT': 0.254561556379476, 'GLM-4': 0.1415682454400737, 'ERNIE-3.5-8K': 0.14866059993668507, 'Qwen1.5-72b': 0.09549466792936462}
In [25]:
### 指标3:Rouge-l,Rouge-w from rouge_chinese import Rouge import jieba def calRouge(target,pred): ''' 传入的是文档列表,越大越好''' f = 0.0 p = 0.0 r = 0.0 for targ,pre in zip(target,pred): target_cut = ' '.join(jieba.cut(targ,cut_all=False)) pred_cut = ' '.join(jieba.cut(pre,cut_all=False)) rouger = Rouge() scores = rouger.get_scores(pred_cut,target_cut) rougeL = scores[0]['rouge-l'] f += rougeL['f'] p += rougeL['p'] r += rougeL['r'] length = len(answer_rag) return f/length,p/length,r/length
In [26]:
## 指标3:rouge from rouge_score import rouge_scorer def rouge(predict, target): scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True) # Calculate the ROUGE score score = scorer.score(predict, target) # Extract the F1 score for ROUGE-1 rouge_score = score['rougeL'].fmeasure return rouge_score def rouge_all(target,pred): rouges = [] for tar,pre in zip(target,pred): score = rouge(pre,tar) rouges.append(score) return np.mean(rouges)
In [27]:
class_roug = dict() for clas in class_top10: print("类别:",clas) rou_dict = dict() rou_dict['LCA-GPT'] = rouge_all(ans_gold[clas],ans_rag[clas]) rou_dict['GLM-4'] = rouge_all(ans_gold[clas],ans_glm[clas]) rou_dict['ERNIE-3.5-8K'] = rouge_all(ans_gold[clas],ans_baidu[clas]) rou_dict['Qwen1.5-72b'] = rouge_all(ans_gold[clas],ans_qwen72[clas]) class_roug[clas] = rou_dict print(rou_dict)
类别: LCA理论与相关知识 {'LCA-GPT': 0.39498794454490654, 'GLM-4': 0.2213504786974626, 'ERNIE-3.5-8K': 0.30471539483857785, 'Qwen1.5-72b': 0.2724725275577533} 类别: 生态保护和环境治理业 {'LCA-GPT': 0.33244458401218807, 'GLM-4': 0.1579393502684968, 'ERNIE-3.5-8K': 0.19627007847801298, 'Qwen1.5-72b': 0.1618331729529683} 类别: 研究和试验发展 {'LCA-GPT': 0.32609547773099173, 'GLM-4': 0.17042631995903024, 'ERNIE-3.5-8K': 0.20333817273069607, 'Qwen1.5-72b': 0.1632025416542098} 类别: 建筑业 {'LCA-GPT': 0.29661873962970675, 'GLM-4': 0.11762091016328305, 'ERNIE-3.5-8K': 0.1457537942283705, 'Qwen1.5-72b': 0.09701429611918969} 类别: 非金属矿物制品业 {'LCA-GPT': 0.40538367218695087, 'GLM-4': 0.13737843819811033, 'ERNIE-3.5-8K': 0.16863152027086453, 'Qwen1.5-72b': 0.11709287878162374} 类别: 化学原料和化学制品制造业 {'LCA-GPT': 0.3627170721998308, 'GLM-4': 0.08316912972085386, 'ERNIE-3.5-8K': 0.13428285147352084, 'Qwen1.5-72b': 0.08392393366531298} 类别: 废弃资源综合利用业 {'LCA-GPT': 0.2940372790691262, 'GLM-4': 0.10792134263471842, 'ERNIE-3.5-8K': 0.13910282923021774, 'Qwen1.5-72b': 0.10455685503456204} 类别: 农、林、牧、渔业 {'LCA-GPT': 0.348491457651763, 'GLM-4': 0.1357506361323155, 'ERNIE-3.5-8K': 0.17828388744419277, 'Qwen1.5-72b': 0.131476254623987} 类别: 电力、热力生产和供应业 {'LCA-GPT': 0.32386475957904526, 'GLM-4': 0.06020408163265306, 'ERNIE-3.5-8K': 0.10913031879418433, 'Qwen1.5-72b': 0.0647123664280527} 类别: 汽车制造业 {'LCA-GPT': 0.3455275567344533, 'GLM-4': 0.09315818281335522, 'ERNIE-3.5-8K': 0.1705906011713634, 'Qwen1.5-72b': 0.12014164935057432}
In [28]:
df_rouge = pd.DataFrame.from_dict(class_roug,orient='index').T df_rouge.to_csv("/home/zhangxj/WorkFile/LCA-GPT/LCA_RAG/data/eval/rouge.csv",index=False,encoding="utf-8")
In [29]:
## 计算BERTscore from transformers import BertTokenizer, BertModel tokenizer = BertTokenizer.from_pretrained("/home/zhangxj/models/bert/bert-base-chinese") model2 = BertModel.from_pretrained("/home/zhangxj/models/bert/bert-base-chinese") def cosine_similarity(a, b): return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) def bert_score(reference, candidate, return_similarity_matrix=False): # 计算余弦相似度 cosine_similarities = np.zeros((reference.shape[0], candidate.shape[0])) for i, c in enumerate(candidate): for j, r in enumerate(reference): cosine_similarities[i, j] = cosine_similarity(c, r) # 取每一行数据的最大余弦相似度 max_similarities = cosine_similarities.max(axis=1) # 取所有余弦相似度的均值 bertscore = max_similarities.mean() if return_similarity_matrix: return bertscore, cosine_similarities else: return bertscore
/home/zhangxj/miniconda3/envs/Qwen/lib/python3.10/site-packages/torch/_utils.py:831: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage() return self.fget.__get__(instance, owner)()
In [30]:
class_bert = dict() for clas in class_top10: print("类别:",clas) bert_dict = dict() bert_dict['LCA-GPT'] = bert_score(emb_ans[clas],emb_rag[clas]) bert_dict['GLM-4'] = bert_score(emb_ans[clas],emb_glm[clas]) bert_dict['ERNIE-3.5-8K'] = bert_score(emb_ans[clas],emb_baidu[clas]) bert_dict['Qwen1.5-72b'] = bert_score(emb_ans[clas],emb_qwen72[clas]) class_bert[clas] = bert_dict print(bert_dict)
类别: LCA理论与相关知识 {'LCA-GPT': 0.8285459900958628, 'GLM-4': 0.8031236508979073, 'ERNIE-3.5-8K': 0.8069283152682871, 'Qwen1.5-72b': 0.80836640302139} 类别: 生态保护和环境治理业 {'LCA-GPT': 0.8278908556430978, 'GLM-4': 0.7980815338993579, 'ERNIE-3.5-8K': 0.7964145340400919, 'Qwen1.5-72b': 0.7882782395543724} 类别: 研究和试验发展 {'LCA-GPT': 0.8031791591570013, 'GLM-4': 0.762518234991953, 'ERNIE-3.5-8K': 0.7619536520907441, 'Qwen1.5-72b': 0.757611659642692} 类别: 建筑业 {'LCA-GPT': 0.8203105013249284, 'GLM-4': 0.7842578558598534, 'ERNIE-3.5-8K': 0.7759484621427827, 'Qwen1.5-72b': 0.7679492297819105} 类别: 非金属矿物制品业 {'LCA-GPT': 0.825442724214877, 'GLM-4': 0.7794850773172952, 'ERNIE-3.5-8K': 0.7705819658894356, 'Qwen1.5-72b': 0.760956196511378} 类别: 化学原料和化学制品制造业 {'LCA-GPT': 0.8266749505338997, 'GLM-4': 0.7717763445843225, 'ERNIE-3.5-8K': 0.764714789459075, 'Qwen1.5-72b': 0.7494578895897701} 类别: 废弃资源综合利用业 {'LCA-GPT': 0.8150948574588557, 'GLM-4': 0.761772907653432, 'ERNIE-3.5-8K': 0.7655670039213387, 'Qwen1.5-72b': 0.7485851030440847} 类别: 农、林、牧、渔业 {'LCA-GPT': 0.8184339215282266, 'GLM-4': 0.7447150918363615, 'ERNIE-3.5-8K': 0.75926776605708, 'Qwen1.5-72b': 0.7276846668647445} 类别: 电力、热力生产和供应业 {'LCA-GPT': 0.811075790060891, 'GLM-4': 0.7617944805395036, 'ERNIE-3.5-8K': 0.769578997104887, 'Qwen1.5-72b': 0.7415060230663845} 类别: 汽车制造业 {'LCA-GPT': 0.8514739521618547, 'GLM-4': 0.798203267585272, 'ERNIE-3.5-8K': 0.8130694065970936, 'Qwen1.5-72b': 0.7754989156777832}
In [31]:
df_bert = pd.DataFrame.from_dict(class_bert,orient='index').T df_bert.to_csv("/home/zhangxj/WorkFile/LCA-GPT/LCA_RAG/data/eval/bert.csv",index=False,encoding="utf-8")
In [34]:
# 创建一个字典,将中文标签映射为英文 # x_labels = { # "LCA理论与相关知识": "LCA Theory and Related Knowledge", # "生态保护和环境治理业": "Ecological Protection and Environmental Management", # "研究和试验发展": "Research and Experimental Development", # "建筑业": "Construction Industry", # "非金属矿物制品业": "Non-metallic Mineral Products Industry", # "化学原料和化学制品制造业": "Chemical Raw Materials and Products Manufacturing", # "废弃资源综合利用业": "Waste Resource Recycling Industry", # "农、林、牧、渔业": "Agriculture, Forestry, Animal Husbandry, and Fishery", # "电力、热力生产和供应业": "Electricity and Heat Production and Supply", # "汽车制造业": "Automobile Manufacturing Industry" # } x_labels_abbr = { "LCA理论与相关知识": "LCA Theory", "生态保护和环境治理业": "Ecological Protection", "研究和试验发展": "R&D", "建筑业": "Construction", "非金属矿物制品业": "Non-metallic Products", "化学原料和化学制品制造业": "Chemicals Manufacturing", "废弃资源综合利用业": "Waste Recycling", "农、林、牧、渔业": "Agriculture & Fisheries", "电力、热力生产和供应业": "Energy Production", "汽车制造业": "Automobile Manufacturing" } category_scores_list = [class_cos,class_bert,class_f1,class_roug]
In [35]:
import matplotlib.pyplot as plt import numpy as np plt.rcParams['font.sans-serif'] = ['Times New Roman'] plt.rcParams['axes.unicode_minus'] = False plt.rcParams['font.size'] = 14 # 设置全局字体大小为14 title_list = ["Similarity", "BERTScore", "F-1", "Rouge"] # 定义颜色 colors_list = [ ['#AEEEEE', '#FFB3BA', '#FFDFD3', '#D6EAF8'], # 第一子图的颜色 ['#D5F5E3', '#FAD7A0', '#A9DFBF', '#D7BDE2'], # 第二子图的颜色 ['#F5C1C1', '#D2F5A9', '#A9D2F5', '#F6D8C1'], # 第三子图的颜色 ['#A3E4D7', '#F7DC6F', '#F5B7B1', '#A9F5BC'] # 第四子图的颜色 ] # 设置图形布局为4 行 1 列 fig, axs = plt.subplots(4, 1, figsize=(12, 10)) # 遍历每个category_scores字典并绘制柱状图 for idx, category_scores in enumerate(category_scores_list): ax = axs[idx] # 确定子图的位置 categories = list(category_scores.keys()) categories_english = [x_labels_abbr[category] for category in categories] models = list(next(iter(category_scores.values())).keys()) values = [list(category_scores[category].values()) for category in categories] colors = colors_list[idx] for i, model in enumerate(models): ax.bar(np.arange(len(categories)) + i * 0.2, [row[i] for row in values], 0.2, label=model, color=colors[i]) # ax.set_title(title_list[idx], fontsize=18) # 设置标题字体大小 ax.set_ylabel(title_list[idx], fontsize=16) # 设置y轴标签字体大小 ax.legend(loc='upper left', bbox_to_anchor=(1, 1), borderaxespad=0., fontsize=12) # 设置图例字体大小 if idx == len(category_scores_list) - 1: ax.set_xticks(np.arange(len(categories)) + 0.3) ax.set_xticklabels(categories_english, rotation=30, ha='right', fontsize=14) # 设置x轴标签字体大小 else: ax.set_xticks([]) # 调整布局 plt.tight_layout() plt.show() plt.savefig('evaluate.png', dpi=300)
<Figure size 640x480 with 0 Axes>
In [ ]:
In [32]:
Out[32]:
name | number | |
---|---|---|
0 | LCA theory and related knowledge | 790 |
1 | Ecological protection and environmental govern... | 754 |
2 | Research and experimental development | 321 |
3 | Construction industry | 295 |
4 | Non-metallic mineral products industry | 183 |
In [5]:
import pandas as pd name = "rouge" df = pd.read_csv("/home/zhangxj/WorkFile/LCA-GPT/LCA_RAG/data/eval/"+name+".csv",encoding="utf-8") df.to_excel("/home/zhangxj/WorkFile/LCA-GPT/LCA_RAG/data/eval/"+name+".xlsx",index=False)
In [ ]: