104 lines
3.4 KiB
Python
104 lines
3.4 KiB
Python
import os
|
||
import time
|
||
from qwen_agent.agents import Assistant
|
||
from pprint import pprint
|
||
import json
|
||
import re
|
||
|
||
'''
|
||
生成QAdata,后续不用
|
||
'''
|
||
def extract_qa(s):
|
||
# 使用正则表达式提取 questions 和 answers
|
||
questions = re.findall(r'#([^#]*?\?)', s)
|
||
answers = re.findall(r'@([^@]*?\。)', s)
|
||
if len(questions) == 0:
|
||
questions = re.findall(r'#([^#]*??)', s)
|
||
print("Q:",questions)
|
||
print("A:",answers)
|
||
return questions, answers
|
||
|
||
# 去除长度小于5的元素
|
||
def filter_short_answers(questions, answers, min_length=8):
|
||
# 遍历 answers 列表,检查每个答案的长度
|
||
filtered_questions = []
|
||
filtered_answers = []
|
||
|
||
for question, answer in zip(questions, answers):
|
||
if len(answer) >= min_length and len(question) >= min_length:
|
||
filtered_questions.append(question)
|
||
filtered_answers.append(answer)
|
||
|
||
return filtered_questions, filtered_answers
|
||
|
||
def write_to_file(filename, data_list):
|
||
with open(filename, 'a', encoding='utf-8') as file:
|
||
for item in data_list:
|
||
file.write(item + '\n')
|
||
|
||
|
||
data = "/home/zhangxj/WorkFile/LCA-GPT/split_LCAdata/folder6"
|
||
docs = os.listdir(data)
|
||
|
||
llm_cfg = {
|
||
'model': 'qwen-plus',
|
||
'model_server': 'dashscope',
|
||
'api_key': "sk-c5f441f863f44094b0ddb96c831b5002",
|
||
}
|
||
|
||
system_instruction = '''你是一位专注于生命周期分析(LCA)领域的数据分析助手。在LCA领域的目标和范围定义、数据清单收集和分析、生命周期影响评价、结果分析和政策建议等方面有着丰富的经验和知识。
|
||
请根据下面的文档提出10个问题及其相应的答案,规定每个问题的字符数量为x,答案的字符数量为y,
|
||
"x>40 & x<70;y>40 & y<70"
|
||
10个question结果的输出为10个字符串,以"#问题1:"开头;
|
||
10个对应的answer结果输出为10个字符串,以"@答案1:"开头,答案以"。"结尾,不要换行。
|
||
|
||
'''
|
||
|
||
tools = ['code_interpreter'] # `code_interpreter` is a built-in tool for executing code.
|
||
messages = [] # This stores the chat history.
|
||
questions = []
|
||
answers = []
|
||
|
||
# Process each document
|
||
for doc in docs:
|
||
doc_path = os.path.join(data, doc)
|
||
files = [doc_path]
|
||
prompt = "分析这篇文章,根据文章研究的内容,并按照格式输出10个与LCA领域相关的问题和相应的答案。"
|
||
messages.append({'role': 'user', 'content': prompt})
|
||
assistant = Assistant(llm=llm_cfg,
|
||
system_message=system_instruction,
|
||
# function_list=tools,
|
||
files=files)
|
||
response = []
|
||
for response in assistant.run(messages=messages):
|
||
continue
|
||
|
||
# pprint(response)
|
||
content = response[0]['content']
|
||
content = content.replace('\n', '')
|
||
print(content)
|
||
# print(type(content))
|
||
question, answer = extract_qa(content)
|
||
filterq,filtera = filter_short_answers(question,answer)
|
||
questions.extend(filterq)
|
||
answers.extend(filtera)
|
||
|
||
file1 = "/home/zhangxj/WorkFile/LCA-GPT/QA/originData/ques.txt"
|
||
file2 = "/home/zhangxj/WorkFile/LCA-GPT/QA/originData/answer.txt"
|
||
write_to_file(file1,filterq)
|
||
write_to_file(file2,filtera)
|
||
|
||
# print(answers)
|
||
# Pause for a while to avoid hitting API rate limits
|
||
time.sleep(3)
|
||
|
||
# Print the final results
|
||
# print("Final Questions List:")
|
||
# pprint(questions)
|
||
|
||
# print("\nFinal Answers List:")
|
||
# pprint(answers)
|
||
|
||
|
||
|