GreenTransPowerCalculate/deeplabv3sdRenewable/tools/山东省地貌识别tools/潜力评估阶段/合并矢量转栅格(市级区域).py

import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
import numpy as np
from shapely.geometry import mapping, box
from tqdm import tqdm
import os

def merge_and_rasterize_vectors_seven_categories(file_paths, output_vector_path, output_raster_path,
                                                 raster_resolution=10, block_size=2000, chunk_size=10000):
    """
    合并7个地貌类型矢量图层，转为栅格并显示7个类别，保留0值作为背景，优化合并策略（去掉cropland）

    参数:
    file_paths: 包含7个矢量文件路径的列表 (按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland)
    output_vector_path: 输出合并后的矢量文件路径（建议使用.gpkg格式）
    output_raster_path: 输出栅格文件路径
    raster_resolution: 输出栅格的分辨率（默认10米）
    block_size: 每个分块的宽度和高度（像素单位，默认2000）
    chunk_size: 每次处理矢量数据的块大小（默认10000条记录）
    """
    try:
        # 检查输入文件数量
        if len(file_paths) != 7:
            raise ValueError("请提供正好7个矢量文件路径")

        # 定义类别映射，按照指定顺序（去掉cropland）
        categories = ['forest', 'water', 'shrub', 'wetland', 'jianzhu', 'grass', 'bareland']
        category_values = {cat: i + 1 for i, cat in enumerate(categories)}  # forest=1, water=2, ..., bareland=7

        # 逐块读取矢量文件，合并到内存中
        gdfs = []
        base_crs = None
        for i, (path, category) in enumerate(tqdm(zip(file_paths, categories), total=7, desc="读取矢量文件进度"), 1):
            print(f"正在处理第{i}个矢量文件: {path}")
            gdf = gpd.read_file(path, engine="pyogrio")
            gdf['category'] = category  # 添加类别字段
            gdf['value'] = category_values[category]  # 添加数值字段用于栅格化
            gdf['value'] = gdf['value'].astype(int)  # 确保 value 为整数类型
            if 'Shape_Area' in gdf.columns:
                gdf['Shape_Area'] = gdf['Shape_Area'].clip(upper=1e9)  # 限制最大值，避免溢出

            # 统一坐标系（以第一个图层为基准）
            if i == 1:
                base_crs = gdf.crs
            else:
                if gdf.crs != base_crs:
                    print(f"第{i}个图层坐标系不同，正在转换为第一个图层的坐标系...")
                    gdf = gdf.to_crs(base_crs)

            print(f"第{i}个图层 value 字段分布:", gdf['value'].value_counts(dropna=False))
            gdfs.append(gdf)

        # 在内存中合并所有 gdf
        print("正在合并所有图层...")
        merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=base_crs)
        print("合并后 value 字段分布:", merged_gdf['value'].value_counts(dropna=False))

        # 保存合并后的矢量文件
        print(f"正在保存合并矢量结果到: {output_vector_path}")
        merged_gdf.to_file(output_vector_path, driver='GPKG', engine="pyogrio")

        # 重新加载合并后的文件，检查 value 字段
        print("正在重新加载合并后的文件...")
        merged_gdf = gpd.read_file(output_vector_path, engine="pyogrio")
        print("重新加载后 value 字段分布:", merged_gdf['value'].value_counts(dropna=False))

        # 数据清洗：处理 value 字段
        print("正在清洗 'value' 字段数据...")
        # 处理 nan 值
        if merged_gdf['value'].isna().any():
            print("警告: 'value' 字段包含 nan 值，将替换为 0")
            merged_gdf['value'] = merged_gdf['value'].fillna(0)
        # 转换为整数类型
        merged_gdf['value'] = merged_gdf['value'].astype(float).astype(int)
        # 确保值在 0-7 范围内
        valid_values = set(range(0, 8))  # 0=背景, 1=forest, ..., 7=bareland
        if not merged_gdf['value'].isin(valid_values).all():
            print(f"警告: 'value' 字段包含无效值: {set(merged_gdf['value'].unique()) - valid_values}，将替换为 0")
            merged_gdf['value'] = merged_gdf['value'].apply(lambda x: x if x in valid_values else 0)

        print("清洗后 value 字段分布:", merged_gdf['value'].value_counts(dropna=False))

        # 构建空间索引以加速后续栅格化
        print("正在构建空间索引以加速栅格化...")
        sindex = merged_gdf.sindex

        # 计算栅格化的范围
        bounds = merged_gdf.total_bounds  # [minx, miny, maxx, maxy]
        width = int((bounds[2] - bounds[0]) / raster_resolution)
        height = int((bounds[3] - bounds[1]) / raster_resolution)

        # 检查栅格尺寸是否有效
        if width <= 0 or height <= 0:
            raise ValueError(f"栅格尺寸无效: 宽度={width}, 高度={height}，检查范围或分辨率")

        # 创建栅格化变换
        transform = rasterio.transform.from_bounds(
            bounds[0], bounds[1], bounds[2], bounds[3], width, height
        )

        # 计算总分块数以设置进度条
        total_blocks = ((height + block_size - 1) // block_size) * ((width + block_size - 1) // block_size)
        print(f"总分块数: {total_blocks}")

        # 创建并写入栅格文件（逐块写入，避免一次性占用大量内存）
        with rasterio.open(
                output_raster_path,
                'w',
                driver='GTiff',
                height=height,
                width=width,
                count=1,
                dtype=rasterio.uint8,
                crs=base_crs,
                transform=transform,
                nodata=0
        ) as dst:
            # 分块处理并添加进度条
            with tqdm(total=total_blocks, desc="栅格化进度") as pbar:
                for y in range(0, height, block_size):
                    for x in range(0, width, block_size):
                        block_height = min(block_size, height - y)
                        block_width = min(block_size, width - x)

                        # 计算当前分块的地理范围
                        block_minx = bounds[0] + x * raster_resolution
                        block_maxy = bounds[3] - y * raster_resolution
                        block_maxx = block_minx + block_width * raster_resolution
                        block_miny = block_maxy - block_height * raster_resolution

                        # 创建分块边界框并使用空间索引查询
                        block_bounds = box(block_minx, block_miny, block_maxx, block_maxy)
                        indices = sindex.intersection(block_bounds.bounds)
                        block_gdf = merged_gdf.iloc[list(indices)].copy()

                        if block_gdf.empty:
                            pbar.update(1)
                            continue

                        # 打印分块中的 value 分布
                        print(f"分块 (x={x}, y={y}) value 字段分布:", block_gdf['value'].value_counts(dropna=False))

                        # 分块栅格化
                        block_transform = rasterio.transform.from_bounds(
                            block_minx, block_miny, block_maxx, block_maxy, block_width, block_height
                        )
                        shapes = [(mapping(geom), value) for geom, value in zip(block_gdf.geometry, block_gdf['value'])]
                        block_raster = rasterize(
                            shapes=shapes,
                            out_shape=(block_height, block_width),
                            transform=block_transform,
                            fill=0,
                            dtype=rasterio.uint8
                        )

                        # 直接写入当前分块到文件中
                        dst.write(block_raster, 1, window=((y, y + block_height), (x, x + block_width)))
                        pbar.update(1)

        print("处理完成！")
        print(f"合并矢量保存为: {output_vector_path}")
        print(f"栅格保存为: {output_raster_path}")
        print(
            f"栅格中类别值: 0=背景, 1=forest, 2=water, 3=shrub, 4=wetland, 5=jianzhu, 6=grass, 7=bareland")

    except Exception as e:
        print(f"发生错误: {str(e)}")


# 使用示例
if __name__ == "__main__":
    # 输入7个矢量文件路径（按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland，去掉cropland）
    input_files = [
        r"weifang/weifang/forest/forest31.shp",
        r"weifang/weifang/water/water1.shp",
        r"weifang/weifang/shrub/shrub.shp",
        r"weifang/weifang/wetland/wetland.shp",
        r"weifang/weifang/jianzhu/jianzhu1.shp",
        r"weifang/weifang/grass/grass.shp",
        r"weifang/weifang/bareland/bareland.shp"
    ]
    output_vector_file = r"weifang/weifang/weifanghebing/weifangdimiao.gpkg"  # 改为.gpkg
    output_raster_file = r"weifang/weifang/weifanghebing/weifangdimao.tif"

    # 执行合并和栅格化
    merge_and_rasterize_vectors_seven_categories(input_files, output_vector_file, output_raster_file,
                                                 raster_resolution=10, block_size=2000, chunk_size=10000)