LCA-GPT/LCArag/QAdata.py

100 lines
3.2 KiB
Python
Raw Normal View History

2024-07-30 10:56:08 +08:00
import os
import time
from qwen_agent.agents import Assistant
from pprint import pprint
import json
import re
def extract_qa(s):
# 使用正则表达式提取 questions 和 answers
questions = re.findall(r'#([^#]*?)', s)
answers = re.findall(r'@([^@]*?\。)', s)
print("Q:",questions)
print("A:",answers)
return questions, answers
# 去除长度小于5的元素
def filter_short_answers(questions, answers, min_length=8):
# 遍历 answers 列表,检查每个答案的长度
filtered_questions = []
filtered_answers = []
for question, answer in zip(questions, answers):
if len(answer) >= min_length and len(question) >= min_length:
filtered_questions.append(question)
filtered_answers.append(answer)
return filtered_questions, filtered_answers
def write_to_file(filename, data_list):
with open(filename, 'a', encoding='utf-8') as file:
for item in data_list:
file.write(item + '\n')
data = "/home/zhangxj/WorkFile/LCA-GPT/split_LCAdata/folder4"
docs = os.listdir(data)
llm_cfg = {
'model': 'qwen-plus',
'model_server': 'dashscope',
'api_key': "sk-c5f441f863f44094b0ddb96c831b5002",
}
system_instruction = '''你是一位专注于生命周期分析LCA领域的数据分析助手。在LCA领域的目标和范围定义、数据清单收集和分析、生命周期影响评价、结果分析和政策建议等方面有着丰富的经验和知识。
请根据下面的文档提出10个问题及其相应的答案规定每个问题的字符数量为x答案的字符数量为y
"x>40 & x<70;y>40 & y<70"
10个question结果的输出为10个字符串"#问题1:"开头
10个对应的answer结果输出为10个字符串"@答案1:"开头答案以""结尾,不要换行不要换行
'''
tools = ['code_interpreter'] # `code_interpreter` is a built-in tool for executing code.
messages = [] # This stores the chat history.
questions = []
answers = []
# Process each document
for doc in docs:
doc_path = os.path.join(data, doc)
files = [doc_path]
prompt = "分析这篇文章并按照格式输出10个问题和相应的答案。"
messages.append({'role': 'user', 'content': prompt})
assistant = Assistant(llm=llm_cfg,
system_message=system_instruction,
# function_list=tools,
files=files)
response = []
for response in assistant.run(messages=messages):
continue
# pprint(response)
content = response[0]['content']
content = content.replace('\n', '')
print(content)
# print(type(content))
question, answer = extract_qa(content)
filterq,filtera = filter_short_answers(question,answer)
questions.extend(filterq)
answers.extend(filtera)
file1 = "/home/zhangxj/WorkFile/LCA-GPT/QA/ques.txt"
file2 = "/home/zhangxj/WorkFile/LCA-GPT/QA/answer.txt"
write_to_file(file1,filterq)
write_to_file(file2,filtera)
# print(answers)
# Pause for a while to avoid hitting API rate limits
time.sleep(3)
# Print the final results
# print("Final Questions List:")
# pprint(questions)
# print("\nFinal Answers List:")
# pprint(answers)