182 lines
6.0 KiB
Plaintext
182 lines
6.0 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "cdbca093aac051e26e17272d0e199954-1",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import sys\n",
|
||
|
"from pathlib import Path\n",
|
||
|
"root_path = Path(\".\")\n",
|
||
|
"while not (root_path/\".git\").exists():\n",
|
||
|
" root_path = root_path.absolute().parent\n",
|
||
|
"sys.path.append(str(root_path/\"data\"/\"rag\"))\n",
|
||
|
"from rag_utils import logs_path, constraint_path"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "998bf3c5f3ec65213e0dcf42fc88eb63-1",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import re\n",
|
||
|
"import json\n",
|
||
|
"import pandas as pd"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "f89dc0bd1f3e913c2fe639bdc02fd74f-1",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Find all log paths"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "0e3bbe0261cd1d62992907e8d71be8d4-1",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"problem_paths = sorted(logs_path.glob(\"*/\"), key=lambda x: int(x.name))\n",
|
||
|
"problem_paths[:5]\n",
|
||
|
"\n",
|
||
|
"real_logs = []\n",
|
||
|
"skipped = []\n",
|
||
|
"\n",
|
||
|
"for problem_path in problem_paths:\n",
|
||
|
" gpt4o_log_path = list(problem_path.glob(\"run*_gpt-4o*\"))\n",
|
||
|
" \n",
|
||
|
" if len(gpt4o_log_path) == 0:\n",
|
||
|
" skipped.append(problem_path)\n",
|
||
|
" elif len(gpt4o_log_path) > 1:\n",
|
||
|
" newest_log_path = max(gpt4o_log_path, key=lambda p: p.stat().st_mtime)\n",
|
||
|
" real_logs.append(newest_log_path)\n",
|
||
|
" else:\n",
|
||
|
" real_logs.append(gpt4o_log_path[0])\n",
|
||
|
"\n",
|
||
|
"assert len(skipped) == 0"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "4f22e22f10ae3fce36d0d1f69fb0eeac-1",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"missing_files_in_log_folder = []\n",
|
||
|
"\n",
|
||
|
"for idx in range(len(real_logs)):\n",
|
||
|
" # idx = random.randint(0, len(real_logs))\n",
|
||
|
" names = sorted(real_logs[idx].glob(\"*\"), key=lambda x: int(\"0\"+\"\".join(re.findall(\"\\\\d\", x.name))))\n",
|
||
|
" if len(names) == 0:\n",
|
||
|
" missing_files_in_log_folder.append(real_logs[idx])\n",
|
||
|
" continue\n",
|
||
|
" print([x.name for x in names][-1])\n",
|
||
|
" # print()\n",
|
||
|
"\n",
|
||
|
"missing_files_in_log_folder"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "d374627207c37fd7424a9aeccda1c4e2-1",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def extract_solution_instance_status(file_path: Path):\n",
|
||
|
" lines = file_path.read_text().splitlines()\n",
|
||
|
" \n",
|
||
|
" extracted_data = []\n",
|
||
|
"\n",
|
||
|
" for i, line in enumerate(lines):\n",
|
||
|
" if i == 0:\n",
|
||
|
" continue\n",
|
||
|
" parts = line.split()\n",
|
||
|
" if len(parts) >= 4:\n",
|
||
|
" instance_number = int(parts[0])\n",
|
||
|
" status = parts[3] == \"Solved\"\n",
|
||
|
" extracted_data.append((instance_number, status))\n",
|
||
|
"\n",
|
||
|
" return [x[0] for x in extracted_data if x[1]]\n",
|
||
|
"\n",
|
||
|
"# Path to the input file\n",
|
||
|
"file_path = logs_path/'status.txt'\n",
|
||
|
"file_path\n",
|
||
|
"\n",
|
||
|
"# Extract the solution instance number and Status\n",
|
||
|
"solved_problems = extract_solution_instance_status(file_path)\n",
|
||
|
"solved_problems"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "0d721badf71c5a6669badeab3acf97dd-1",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"problem_objectives_formulations_and_labels = []\n",
|
||
|
"for log_path in real_logs:\n",
|
||
|
" if (state_6_path := (log_path/\"state_6_code.json\")).exists():\n",
|
||
|
" if int(log_path.parent.name) not in solved_problems:\n",
|
||
|
" continue\n",
|
||
|
" labels = json.loads((log_path.parent/\"labels.json\").read_text())\n",
|
||
|
" description = (log_path.parent/\"desc.txt\").read_text()\n",
|
||
|
" data = json.loads(state_6_path.read_text())\n",
|
||
|
" objective_description = data[\"objective\"][\"description\"]\n",
|
||
|
" objective_formulation = data[\"objective\"][\"formulation\"]\n",
|
||
|
" constraints = []\n",
|
||
|
" for constraint in data[\"constraints\"]:\n",
|
||
|
" constraints.append({\"description\": constraint[\"description\"], \"formulation\": constraint[\"formulation\"]})\n",
|
||
|
" problem_objectives_formulations_and_labels.append({\n",
|
||
|
" \"objective_description\": objective_description,\n",
|
||
|
" \"objective_formulation\": objective_formulation,\n",
|
||
|
" \"constraints\": constraints,\n",
|
||
|
" \"labels\": labels,\n",
|
||
|
" \"description\": description,\n",
|
||
|
" \"problem_name\": log_path.parent.name\n",
|
||
|
" })\n",
|
||
|
"\n",
|
||
|
"problem_objectives_formulations_and_labels[0]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "571b12c459bf920f6c2d04a7b97ffa8f-1",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"constraint_data = []\n",
|
||
|
"total_constraints = 0\n",
|
||
|
"for data in problem_objectives_formulations_and_labels:\n",
|
||
|
" total_constraints += len(data[\"constraints\"])\n",
|
||
|
" for data_constraint in data[\"constraints\"]:\n",
|
||
|
" constraint_data.append({\n",
|
||
|
" \"objective_description\": data[\"objective_description\"],\n",
|
||
|
" \"objective_formulation\": data[\"objective_formulation\"],\n",
|
||
|
" \"constraint_description\": data_constraint[\"description\"],\n",
|
||
|
" \"constraint_formulation\": data_constraint[\"formulation\"],\n",
|
||
|
" \"labels\": data[\"labels\"],\n",
|
||
|
" \"description\": data[\"description\"],\n",
|
||
|
" \"problem_name\": data[\"problem_name\"]\n",
|
||
|
" })\n",
|
||
|
"\n",
|
||
|
"constraints_df = pd.DataFrame(constraint_data)\n",
|
||
|
"constraints_df.to_pickle(constraint_path)"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|