LCA-LLM/LCA_RAG/batchGLM.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 创建批处理xlsx文件\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "customid = 1\n",
    "method = \"POST\"\n",
    "url = \"/v4/chat/completions\"\n",
    "model = \"glm-4\"\n",
    "role = \"system\"\n",
    "instruction = \"你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题；不要列出几点来回答，不需要换行，只需要用1句话回答问题。\"\n",
    "\n",
    "temperature = 0.95\n",
    "top_p = 0.7\n",
    "max_tokens = 4096\n",
    "\n",
    "df = pd.DataFrame(columns=[\"custom_id\",\"method\",\"url\",\"model\",\"role\",\"content\",\"role1\",\"content1\",\"temperature\",\"top_p\",\"max_tokens\"])\n",
    "  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "question  = []\n",
    "with open(\"/home/zhangxj/WorkFile/LCA-GPT/QA/filters/question.txt\",\"r\",encoding=\"utf-8\") as file:\n",
    "    for line in file.readlines():\n",
    "        question.append(line.strip())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = []\n",
    "for ques in question:\n",
    "    row = {\n",
    "        \"custom_id\": f\"request-{customid}\",\n",
    "        \"method\":method,\n",
    "        \"url\":url,\n",
    "        \"model\":model,\n",
    "        \"role\":role,\n",
    "        \"content\":instruction,\n",
    "        \"role1\":\"user\",\n",
    "        \"content1\":ques,\n",
    "        \"temperature\":temperature,\n",
    "        \"top_p\":top_p,\n",
    "        \"max_tokens\":max_tokens\n",
    "    }\n",
    "    data.append(row)\n",
    "    customid+=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3933"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>custom_id</th>\n",
       "      <th>method</th>\n",
       "      <th>url</th>\n",
       "      <th>model</th>\n",
       "      <th>role</th>\n",
       "      <th>content</th>\n",
       "      <th>role1</th>\n",
       "      <th>content1</th>\n",
       "      <th>temperature</th>\n",
       "      <th>top_p</th>\n",
       "      <th>max_tokens</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>request-1</td>\n",
       "      <td>POST</td>\n",
       "      <td>/v4/chat/completions</td>\n",
       "      <td>glm-4</td>\n",
       "      <td>system</td>\n",
       "      <td>你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题；不要列出几点来回答，不需要...</td>\n",
       "      <td>user</td>\n",
       "      <td>什么是生命周期分析（LCA）的主要目标？</td>\n",
       "      <td>0.95</td>\n",
       "      <td>0.7</td>\n",
       "      <td>4096</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>request-2</td>\n",
       "      <td>POST</td>\n",
       "      <td>/v4/chat/completions</td>\n",
       "      <td>glm-4</td>\n",
       "      <td>system</td>\n",
       "      <td>你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题；不要列出几点来回答，不需要...</td>\n",
       "      <td>user</td>\n",
       "      <td>在LCA中，如何确定研究的范围？</td>\n",
       "      <td>0.95</td>\n",
       "      <td>0.7</td>\n",
       "      <td>4096</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>request-3</td>\n",
       "      <td>POST</td>\n",
       "      <td>/v4/chat/completions</td>\n",
       "      <td>glm-4</td>\n",
       "      <td>system</td>\n",
       "      <td>你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题；不要列出几点来回答，不需要...</td>\n",
       "      <td>user</td>\n",
       "      <td>医疗废物如何处理？</td>\n",
       "      <td>0.95</td>\n",
       "      <td>0.7</td>\n",
       "      <td>4096</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>request-4</td>\n",
       "      <td>POST</td>\n",
       "      <td>/v4/chat/completions</td>\n",
       "      <td>glm-4</td>\n",
       "      <td>system</td>\n",
       "      <td>你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题；不要列出几点来回答，不需要...</td>\n",
       "      <td>user</td>\n",
       "      <td>LCA数据清单收集阶段需要哪些信息？</td>\n",
       "      <td>0.95</td>\n",
       "      <td>0.7</td>\n",
       "      <td>4096</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>request-5</td>\n",
       "      <td>POST</td>\n",
       "      <td>/v4/chat/completions</td>\n",
       "      <td>glm-4</td>\n",
       "      <td>system</td>\n",
       "      <td>你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题；不要列出几点来回答，不需要...</td>\n",
       "      <td>user</td>\n",
       "      <td>生命周期影响评价阶段的目标是什么？</td>\n",
       "      <td>0.95</td>\n",
       "      <td>0.7</td>\n",
       "      <td>4096</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   custom_id method                   url  model    role  \\\n",
       "0  request-1   POST  /v4/chat/completions  glm-4  system   \n",
       "1  request-2   POST  /v4/chat/completions  glm-4  system   \n",
       "2  request-3   POST  /v4/chat/completions  glm-4  system   \n",
       "3  request-4   POST  /v4/chat/completions  glm-4  system   \n",
       "4  request-5   POST  /v4/chat/completions  glm-4  system   \n",
       "\n",
       "                                             content role1  \\\n",
       "0  你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题；不要列出几点来回答，不需要...  user   \n",
       "1  你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题；不要列出几点来回答，不需要...  user   \n",
       "2  你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题；不要列出几点来回答，不需要...  user   \n",
       "3  你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题；不要列出几点来回答，不需要...  user   \n",
       "4  你是生命周期领域富有经验和知识的专家。根据你所掌握的知识回答问题；不要列出几点来回答，不需要...  user   \n",
       "\n",
       "               content1  temperature  top_p  max_tokens  \n",
       "0  什么是生命周期分析（LCA）的主要目标？         0.95    0.7        4096  \n",
       "1      在LCA中，如何确定研究的范围？         0.95    0.7        4096  \n",
       "2             医疗废物如何处理？         0.95    0.7        4096  \n",
       "3    LCA数据清单收集阶段需要哪些信息？         0.95    0.7        4096  \n",
       "4     生命周期影响评价阶段的目标是什么？         0.95    0.7        4096  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()\n",
    "\n",
    "# \"custom_id\",\"method\",\"url\",\"model\",\"role\",\"content\",\"role1\",\"content1\",\"temperature\",\"top_p\",\"max_tokens\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_excel(\"/home/zhangxj/WorkFile/LCA-GPT/QA/questionForBatch.xlsx\",index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Batch(id='batch_1823353255129645056', completion_window='24h', created_at=1723556266945, endpoint='/v4/chat/completions', input_file_id='1723556210_f79e4160ab3840b4b02f44c821d27752', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=None, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': '回答问题'}, output_file_id=None, request_counts=BatchRequestCounts(completed=None, failed=None, total=3933))\n"
     ]
    }
   ],
   "source": [
    "from zhipuai import ZhipuAI\n",
    " \n",
    "client = ZhipuAI(api_key=\"434790cf952335f18b6347e7b6de9777.V50p55zfk8Ye4ojV\")  # 填写您自己的APIKey\n",
    "\n",
    "create = client.batches.create(\n",
    "    input_file_id=\"1723556210_f79e4160ab3840b4b02f44c821d27752\",\n",
    "    endpoint=\"/v4/chat/completions\", \n",
    "    completion_window=\"24h\", #完成时间只支持 24 小时\n",
    "    metadata={\n",
    "        \"description\": \"回答问题\"\n",
    "    }\n",
    ")\n",
    "print(create)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Batch(id=None, completion_window=None, created_at=None, endpoint=None, input_file_id=None, object=None, status=None, cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=None, failed_at=None, finalizing_at=None, in_progress_at=None, metadata=None, output_file_id=None, request_counts=None)\n"
     ]
    }
   ],
   "source": [
    "batch_job = client.batches.retrieve(\"batch_id\")\n",
    "print(batch_job)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Qwen",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}