LCA-LLM/DataAnalysis/SplitData.py

40 lines
1.3 KiB
Python
Raw Permalink Normal View History

2024-07-30 10:11:41 +08:00
import os
import shutil
# 原始文件夹路径
original_folder = '/home/zhangxj/WorkFile/LCA-GPT/LCAdata'
# 新文件夹的基础路径
base_new_folder = '/home/zhangxj/WorkFile/LCA-GPT/split_LCAdata'
# 获取原始文件夹中的所有PDF文件
pdf_files = [f for f in os.listdir(original_folder) if f.endswith('.pdf')]
# 计算每组文件的数量和剩余文件数量
files_per_group, remainder = divmod(len(pdf_files), 6)
# 创建并分配文件到各个组
groups = []
for i in range(6):
group = []
if i < remainder:
group = pdf_files[i * (files_per_group + 1):(i + 1) * (files_per_group + 1)]
else:
group = pdf_files[i * files_per_group + remainder:(i + 1) * files_per_group + remainder]
groups.append(group)
# 确保每组文件数量正确
for group in groups:
assert len(group) in (files_per_group, files_per_group + 1), "每组文件数量不正确"
# 分组并复制文件
for i, group in enumerate(groups):
# 创建新文件夹
new_folder = os.path.join(base_new_folder, f'folder{i+1}')
os.makedirs(new_folder, exist_ok=True)
# 复制文件到新文件夹
for j, file_name in enumerate(group):
file_path = os.path.join(original_folder, file_name)
shutil.copy(file_path, new_folder)
print("文件分组和复制完成。")