LCA-LLM/LCA_RAG/batchGLM.ipynb

328 lines
11 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# 创建批处理xlsx文件\n",
"\n",
"import pandas as pd\n",
"\n",
"customid = 1\n",
"method = \"POST\"\n",
"url = \"/v4/chat/completions\"\n",
"model = \"glm-4\"\n",
"role = \"system\"\n",
"instruction = \"你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题不要列出几点来回答不需要换行只需要用1句话回答问题。\"\n",
"\n",
"temperature = 0.95\n",
"top_p = 0.7\n",
"max_tokens = 4096\n",
"\n",
"df = pd.DataFrame(columns=[\"custom_id\",\"method\",\"url\",\"model\",\"role\",\"content\",\"role1\",\"content1\",\"temperature\",\"top_p\",\"max_tokens\"])\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"question = []\n",
"with open(\"/home/zhangxj/WorkFile/LCA-GPT/QA/filters/question.txt\",\"r\",encoding=\"utf-8\") as file:\n",
" for line in file.readlines():\n",
" question.append(line.strip())"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"data = []\n",
"for ques in question:\n",
" row = {\n",
" \"custom_id\": f\"request-{customid}\",\n",
" \"method\":method,\n",
" \"url\":url,\n",
" \"model\":model,\n",
" \"role\":role,\n",
" \"content\":instruction,\n",
" \"role1\":\"user\",\n",
" \"content1\":ques,\n",
" \"temperature\":temperature,\n",
" \"top_p\":top_p,\n",
" \"max_tokens\":max_tokens\n",
" }\n",
" data.append(row)\n",
" customid+=1"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3933"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(data)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>custom_id</th>\n",
" <th>method</th>\n",
" <th>url</th>\n",
" <th>model</th>\n",
" <th>role</th>\n",
" <th>content</th>\n",
" <th>role1</th>\n",
" <th>content1</th>\n",
" <th>temperature</th>\n",
" <th>top_p</th>\n",
" <th>max_tokens</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>request-1</td>\n",
" <td>POST</td>\n",
" <td>/v4/chat/completions</td>\n",
" <td>glm-4</td>\n",
" <td>system</td>\n",
" <td>你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要...</td>\n",
" <td>user</td>\n",
" <td>什么是生命周期分析LCA的主要目标</td>\n",
" <td>0.95</td>\n",
" <td>0.7</td>\n",
" <td>4096</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>request-2</td>\n",
" <td>POST</td>\n",
" <td>/v4/chat/completions</td>\n",
" <td>glm-4</td>\n",
" <td>system</td>\n",
" <td>你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要...</td>\n",
" <td>user</td>\n",
" <td>在LCA中如何确定研究的范围</td>\n",
" <td>0.95</td>\n",
" <td>0.7</td>\n",
" <td>4096</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>request-3</td>\n",
" <td>POST</td>\n",
" <td>/v4/chat/completions</td>\n",
" <td>glm-4</td>\n",
" <td>system</td>\n",
" <td>你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要...</td>\n",
" <td>user</td>\n",
" <td>医疗废物如何处理?</td>\n",
" <td>0.95</td>\n",
" <td>0.7</td>\n",
" <td>4096</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>request-4</td>\n",
" <td>POST</td>\n",
" <td>/v4/chat/completions</td>\n",
" <td>glm-4</td>\n",
" <td>system</td>\n",
" <td>你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要...</td>\n",
" <td>user</td>\n",
" <td>LCA数据清单收集阶段需要哪些信息</td>\n",
" <td>0.95</td>\n",
" <td>0.7</td>\n",
" <td>4096</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>request-5</td>\n",
" <td>POST</td>\n",
" <td>/v4/chat/completions</td>\n",
" <td>glm-4</td>\n",
" <td>system</td>\n",
" <td>你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要...</td>\n",
" <td>user</td>\n",
" <td>生命周期影响评价阶段的目标是什么?</td>\n",
" <td>0.95</td>\n",
" <td>0.7</td>\n",
" <td>4096</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" custom_id method url model role \\\n",
"0 request-1 POST /v4/chat/completions glm-4 system \n",
"1 request-2 POST /v4/chat/completions glm-4 system \n",
"2 request-3 POST /v4/chat/completions glm-4 system \n",
"3 request-4 POST /v4/chat/completions glm-4 system \n",
"4 request-5 POST /v4/chat/completions glm-4 system \n",
"\n",
" content role1 \\\n",
"0 你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要... user \n",
"1 你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要... user \n",
"2 你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要... user \n",
"3 你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要... user \n",
"4 你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题;不要列出几点来回答,不需要... user \n",
"\n",
" content1 temperature top_p max_tokens \n",
"0 什么是生命周期分析LCA的主要目标 0.95 0.7 4096 \n",
"1 在LCA中如何确定研究的范围 0.95 0.7 4096 \n",
"2 医疗废物如何处理? 0.95 0.7 4096 \n",
"3 LCA数据清单收集阶段需要哪些信息 0.95 0.7 4096 \n",
"4 生命周期影响评价阶段的目标是什么? 0.95 0.7 4096 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()\n",
"\n",
"# \"custom_id\",\"method\",\"url\",\"model\",\"role\",\"content\",\"role1\",\"content1\",\"temperature\",\"top_p\",\"max_tokens\"])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"df.to_excel(\"/home/zhangxj/WorkFile/LCA-GPT/QA/questionForBatch.xlsx\",index=False)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Batch(id='batch_1823353255129645056', completion_window='24h', created_at=1723556266945, endpoint='/v4/chat/completions', input_file_id='1723556210_f79e4160ab3840b4b02f44c821d27752', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=None, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': '回答问题'}, output_file_id=None, request_counts=BatchRequestCounts(completed=None, failed=None, total=3933))\n"
]
}
],
"source": [
"from zhipuai import ZhipuAI\n",
" \n",
"client = ZhipuAI(api_key=\"434790cf952335f18b6347e7b6de9777.V50p55zfk8Ye4ojV\") # 填写您自己的APIKey\n",
"\n",
"create = client.batches.create(\n",
" input_file_id=\"1723556210_f79e4160ab3840b4b02f44c821d27752\",\n",
" endpoint=\"/v4/chat/completions\", \n",
" completion_window=\"24h\", #完成时间只支持 24 小时\n",
" metadata={\n",
" \"description\": \"回答问题\"\n",
" }\n",
")\n",
"print(create)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Batch(id=None, completion_window=None, created_at=None, endpoint=None, input_file_id=None, object=None, status=None, cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=None, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=None)\n"
]
}
],
"source": [
"batch_job = client.batches.retrieve(\"batch_id\")\n",
"print(batch_job)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Qwen",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}