6.0 KiB
6.0 KiB
In [ ]:
import sys from pathlib import Path root_path = Path(".") while not (root_path/".git").exists(): root_path = root_path.absolute().parent sys.path.append(str(root_path/"data"/"rag")) from rag_utils import logs_path, constraint_path
In [ ]:
import re import json import pandas as pd
Find all log paths
In [ ]:
problem_paths = sorted(logs_path.glob("*/"), key=lambda x: int(x.name)) problem_paths[:5] real_logs = [] skipped = [] for problem_path in problem_paths: gpt4o_log_path = list(problem_path.glob("run*_gpt-4o*")) if len(gpt4o_log_path) == 0: skipped.append(problem_path) elif len(gpt4o_log_path) > 1: newest_log_path = max(gpt4o_log_path, key=lambda p: p.stat().st_mtime) real_logs.append(newest_log_path) else: real_logs.append(gpt4o_log_path[0]) assert len(skipped) == 0
In [ ]:
missing_files_in_log_folder = [] for idx in range(len(real_logs)): # idx = random.randint(0, len(real_logs)) names = sorted(real_logs[idx].glob("*"), key=lambda x: int("0"+"".join(re.findall("\\d", x.name)))) if len(names) == 0: missing_files_in_log_folder.append(real_logs[idx]) continue print([x.name for x in names][-1]) # print() missing_files_in_log_folder
In [ ]:
def extract_solution_instance_status(file_path: Path): lines = file_path.read_text().splitlines() extracted_data = [] for i, line in enumerate(lines): if i == 0: continue parts = line.split() if len(parts) >= 4: instance_number = int(parts[0]) status = parts[3] == "Solved" extracted_data.append((instance_number, status)) return [x[0] for x in extracted_data if x[1]] # Path to the input file file_path = logs_path/'status.txt' file_path # Extract the solution instance number and Status solved_problems = extract_solution_instance_status(file_path) solved_problems
In [ ]:
problem_objectives_formulations_and_labels = [] for log_path in real_logs: if (state_6_path := (log_path/"state_6_code.json")).exists(): if int(log_path.parent.name) not in solved_problems: continue labels = json.loads((log_path.parent/"labels.json").read_text()) description = (log_path.parent/"desc.txt").read_text() data = json.loads(state_6_path.read_text()) objective_description = data["objective"]["description"] objective_formulation = data["objective"]["formulation"] constraints = [] for constraint in data["constraints"]: constraints.append({"description": constraint["description"], "formulation": constraint["formulation"]}) problem_objectives_formulations_and_labels.append({ "objective_description": objective_description, "objective_formulation": objective_formulation, "constraints": constraints, "labels": labels, "description": description, "problem_name": log_path.parent.name }) problem_objectives_formulations_and_labels[0]
In [ ]:
constraint_data = [] total_constraints = 0 for data in problem_objectives_formulations_and_labels: total_constraints += len(data["constraints"]) for data_constraint in data["constraints"]: constraint_data.append({ "objective_description": data["objective_description"], "objective_formulation": data["objective_formulation"], "constraint_description": data_constraint["description"], "constraint_formulation": data_constraint["formulation"], "labels": data["labels"], "description": data["description"], "problem_name": data["problem_name"] }) constraints_df = pd.DataFrame(constraint_data) constraints_df.to_pickle(constraint_path)