import os import time from qwen_agent.agents import Assistant from pprint import pprint import json import re def extract_qa(s): # 使用正则表达式提取 questions 和 answers questions = re.findall(r'#([^#]*??)', s) answers = re.findall(r'@([^@]*?\。)', s) print("Q:",questions) print("A:",answers) return questions, answers # 去除长度小于5的元素 def filter_short_answers(questions, answers, min_length=8): # 遍历 answers 列表,检查每个答案的长度 filtered_questions = [] filtered_answers = [] for question, answer in zip(questions, answers): if len(answer) >= min_length and len(question) >= min_length: filtered_questions.append(question) filtered_answers.append(answer) return filtered_questions, filtered_answers def write_to_file(filename, data_list): with open(filename, 'a', encoding='utf-8') as file: for item in data_list: file.write(item + '\n') data = "/home/zhangxj/WorkFile/LCA-GPT/split_LCAdata/folder4" docs = os.listdir(data) llm_cfg = { 'model': 'qwen-plus', 'model_server': 'dashscope', 'api_key': "sk-c5f441f863f44094b0ddb96c831b5002", } system_instruction = '''你是一位专注于生命周期分析(LCA)领域的数据分析助手。在LCA领域的目标和范围定义、数据清单收集和分析、生命周期影响评价、结果分析和政策建议等方面有着丰富的经验和知识。 请根据下面的文档提出10个问题及其相应的答案,规定每个问题的字符数量为x,答案的字符数量为y, "x>40 & x<70;y>40 & y<70" 10个question结果的输出为10个字符串,以"#问题1:"开头; 10个对应的answer结果输出为10个字符串,以"@答案1:"开头,答案以"。"结尾,不要换行,不要换行。 ''' tools = ['code_interpreter'] # `code_interpreter` is a built-in tool for executing code. messages = [] # This stores the chat history. questions = [] answers = [] # Process each document for doc in docs: doc_path = os.path.join(data, doc) files = [doc_path] prompt = "分析这篇文章,并按照格式输出10个问题和相应的答案。" messages.append({'role': 'user', 'content': prompt}) assistant = Assistant(llm=llm_cfg, system_message=system_instruction, # function_list=tools, files=files) response = [] for response in assistant.run(messages=messages): continue # pprint(response) content = response[0]['content'] content = content.replace('\n', '') print(content) # print(type(content)) question, answer = extract_qa(content) filterq,filtera = filter_short_answers(question,answer) questions.extend(filterq) answers.extend(filtera) file1 = "/home/zhangxj/WorkFile/LCA-GPT/QA/ques.txt" file2 = "/home/zhangxj/WorkFile/LCA-GPT/QA/answer.txt" write_to_file(file1,filterq) write_to_file(file2,filtera) # print(answers) # Pause for a while to avoid hitting API rate limits time.sleep(3) # Print the final results # print("Final Questions List:") # pprint(questions) # print("\nFinal Answers List:") # pprint(answers)