LCA-LLM/LCA_RAG/batchGLM.ipynb

11 KiB
Raw Permalink Blame History

In [5]:
# 创建批处理xlsx文件

import pandas as pd

customid = 1
method = "POST"
url = "/v4/chat/completions"
model = "glm-4"
role = "system"
instruction = "你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题不要列出几点来回答不需要换行只需要用1句话回答问题。"

temperature = 0.95
top_p = 0.7
max_tokens = 4096

df = pd.DataFrame(columns=["custom_id","method","url","model","role","content","role1","content1","temperature","top_p","max_tokens"])
  
In [4]:
question  = []
with open("/home/zhangxj/WorkFile/LCA-GPT/QA/filters/question.txt","r",encoding="utf-8") as file:
    for line in file.readlines():
        question.append(line.strip())
In [12]:
data = []
for ques in question:
    row = {
        "custom_id": f"request-{customid}",
        "method":method,
        "url":url,
        "model":model,
        "role":role,
        "content":instruction,
        "role1":"user",
        "content1":ques,
        "temperature":temperature,
        "top_p":top_p,
        "max_tokens":max_tokens
    }
    data.append(row)
    customid+=1
In [13]:
len(data)
Out[13]:
3933
In [14]:
df = pd.DataFrame(data)
In [15]:
df.head()

# "custom_id","method","url","model","role","content","role1","content1","temperature","top_p","max_tokens"])
Out[15]:
custom_id method url model role content role1 content1 temperature top_p max_tokens
0 request-1 POST /v4/chat/completions glm-4 system 你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要... user 什么是生命周期分析LCA的主要目标 0.95 0.7 4096
1 request-2 POST /v4/chat/completions glm-4 system 你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要... user 在LCA中如何确定研究的范围 0.95 0.7 4096
2 request-3 POST /v4/chat/completions glm-4 system 你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要... user 医疗废物如何处理? 0.95 0.7 4096
3 request-4 POST /v4/chat/completions glm-4 system 你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要... user LCA数据清单收集阶段需要哪些信息 0.95 0.7 4096
4 request-5 POST /v4/chat/completions glm-4 system 你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要... user 生命周期影响评价阶段的目标是什么? 0.95 0.7 4096
In [16]:
df.to_excel("/home/zhangxj/WorkFile/LCA-GPT/QA/questionForBatch.xlsx",index=False)
In [20]:
from zhipuai import ZhipuAI
 
client = ZhipuAI(api_key="434790cf952335f18b6347e7b6de9777.V50p55zfk8Ye4ojV")  # 填写您自己的APIKey

create = client.batches.create(
    input_file_id="1723556210_f79e4160ab3840b4b02f44c821d27752",
    endpoint="/v4/chat/completions", 
    completion_window="24h", #完成时间只支持 24 小时
    metadata={
        "description": "回答问题"
    }
)
print(create)
Batch(id='batch_1823353255129645056', completion_window='24h', created_at=1723556266945, endpoint='/v4/chat/completions', input_file_id='1723556210_f79e4160ab3840b4b02f44c821d27752', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=None, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': '回答问题'}, output_file_id=None, request_counts=BatchRequestCounts(completed=None, failed=None, total=3933))
In [21]:
batch_job = client.batches.retrieve("batch_id")
print(batch_job)
Batch(id=None, completion_window=None, created_at=None, endpoint=None, input_file_id=None, object=None, status=None, cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=None, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=None)
In [ ]: