GreenTransPowerCalculate/deeplabv3sdRenewable/tools/山东省地貌识别tools/潜力评估阶段/合并矢量转栅格但保存分类（7）.py

import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
import numpy as np
from shapely.geometry import mapping, box
import os
from tqdm import tqdm  # 导入 tqdm 用于进度条


def merge_vectors_seven_categories(file_paths, output_vector_path):
    """
    合并7个地貌类型矢量图层并保存为GeoPackage格式（去掉cropland）

    参数:
    file_paths: 包含7个矢量文件路径的列表 (按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland)
    output_vector_path: 输出合并后的矢量文件路径（使用.gpkg格式）
    """
    try:
        # 检查输入文件数量是否为7
        if len(file_paths) != 7:
            raise ValueError("请提供正好7个矢量文件路径")

        for path in file_paths:
            if not os.path.exists(path):
                raise FileNotFoundError(f"文件不存在: {path}")

        # 定义7个类别（去掉cropland）
        categories = ['forest', 'water', 'shrub', 'wetland', 'jianzhu', 'grass', 'bareland']
        category_values = {cat: i + 1 for i, cat in enumerate(categories)}  # forest=1, water=2, ..., bareland=7

        gdfs = []
        for i, (path, category) in enumerate(zip(file_paths, categories), 1):
            print(f"正在读取第{i}个矢量文件: {path}")
            gdf = gpd.read_file(path, engine="pyogrio")
            gdf['category'] = category
            gdf['value'] = category_values[category]
            if 'Shape_Area' in gdf.columns:
                gdf['Shape_Area'] = gdf['Shape_Area'].clip(upper=1e9)
            gdfs.append(gdf)

        # 统一坐标系
        base_crs = gdfs[0].crs
        for i, gdf in enumerate(gdfs[1:], 2):
            if gdf.crs != base_crs:
                print(f"第{i}个图层坐标系不同，正在转换为第一个图层的坐标系...")
                gdfs[i - 1] = gdf.to_crs(base_crs)

        print("正在合并图层...")
        merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
        merged_gdf = merged_gdf.set_geometry('geometry')

        if not output_vector_path.endswith('.gpkg'):
            output_vector_path = output_vector_path.replace('.shp', '.gpkg')
            print("输出格式已更改为GeoPackage以支持大文件")
        print(f"正在保存合并矢量结果到: {output_vector_path}")
        merged_gdf.to_file(output_vector_path, driver='GPKG', engine="pyogrio")

        print("矢量合并完成！")
        print(f"合并矢量保存为: {output_vector_path}")
        return output_vector_path

    except Exception as e:
        print(f"发生错误: {str(e)}")
        return None


def rasterize_vector_by_blocks(input_vector_path, output_raster_path, raster_resolution=30, block_size=5000):
    """
    将合并后的矢量文件分块转为栅格，显示7个类别，背景值设为 value=8，添加进度条和像素统计

    参数:
    input_vector_path: 输入的合并矢量文件路径（.gpkg格式）
    output_raster_path: 输出栅格文件路径
    raster_resolution: 输出栅格的分辨率（默认30米）
    block_size: 每个分块的宽度和高度（像素单位，默认5000）
    """
    try:
        # 读取合并后的矢量文件
        print(f"正在读取合并矢量文件: {input_vector_path}")
        merged_gdf = gpd.read_file(input_vector_path, engine="pyogrio")

        # 检查 value 字段是否有效
        if 'value' not in merged_gdf.columns:
            raise ValueError("矢量文件中缺少 'value' 字段，请确保文件包含正确的分类值")
        print("合并矢量文件中的 value 字段分布:")
        print(merged_gdf['value'].value_counts(dropna=False))

        if not merged_gdf['value'].between(1, 7).all():
            print("警告: 'value' 字段包含无效值（应在 1-7 之间），可能导致数据丢失")
            merged_gdf = merged_gdf[merged_gdf['value'].between(1, 7)]

        # 检查合并后的矢量数据是否为空
        if merged_gdf.empty:
            raise ValueError("合并后的矢量数据为空，请检查输入数据是否有效")

        # 按 value 字段排序，优先级高的类别（例如 value 较大）后处理，避免被覆盖
        print("正在按 'value' 字段排序以确保优先级...")
        merged_gdf = merged_gdf.sort_values(by='value', ascending=True)

        # 计算总范围和栅格尺寸
        bounds = merged_gdf.total_bounds  # [minx, miny, maxx, maxy]
        total_width = int((bounds[2] - bounds[0]) / raster_resolution)
        total_height = int((bounds[3] - bounds[1]) / raster_resolution)

        if total_width <= 0 or total_height <= 0:
            raise ValueError(f"栅格尺寸无效: 宽度={total_width}, 高度={total_height}")

        print(f"栅格范围: minx={bounds[0]}, miny={bounds[1]}, maxx={bounds[2]}, maxy={bounds[3]}")
        print(f"栅格尺寸: 宽度={total_width}, 高度={total_height}")

        # 创建栅格变换
        transform = rasterio.transform.from_bounds(bounds[0], bounds[1], bounds[2], bounds[3], total_width,
                                                   total_height)

        # 计算总分块数以设置进度条
        total_blocks = ((total_height + block_size - 1) // block_size) * ((total_width + block_size - 1) // block_size)
        print(f"总分块数: {total_blocks}")

        # 创建并写入栅格文件，逐块写入以减少内存占用
        with rasterio.open(
                output_raster_path,
                'w',
                driver='GTiff',
                height=total_height,
                width=total_width,
                count=1,
                dtype=rasterio.uint8,
                crs=merged_gdf.crs,
                transform=transform,
                nodata=255  # 使用 255 作为 nodata 值，区分 value=8 的背景
        ) as dst:
            # 分块处理并添加进度条
            with tqdm(total=total_blocks, desc="栅格化进度") as pbar:
                for y in range(0, total_height, block_size):
                    for x in range(0, total_width, block_size):
                        block_height = min(block_size, total_height - y)
                        block_width = min(block_size, total_width - x)

                        # 计算当前分块的地理范围
                        block_minx = bounds[0] + x * raster_resolution
                        block_maxy = bounds[3] - y * raster_resolution
                        block_maxx = block_minx + block_width * raster_resolution
                        block_miny = block_maxy - block_height * raster_resolution

                        # 创建分块边界框
                        block_bounds = box(block_minx, block_miny, block_maxx, block_maxy)
                        block_gdf = merged_gdf[merged_gdf.geometry.intersects(block_bounds)].copy()

                        # 初始化分块为背景值 8
                        block_raster = np.full((block_height, block_width), 8, dtype=np.uint8)

                        if not block_gdf.empty:
                            # 打印分块中的 value 字段分布
                            print(f"分块 (x={x}, y={y}) 中的 value 字段分布:")
                            print(block_gdf['value'].value_counts(dropna=False))

                            # 分块栅格化
                            block_transform = rasterio.transform.from_bounds(
                                block_minx, block_miny, block_maxx, block_maxy, block_width, block_height
                            )
                            shapes = ((mapping(geom), value) for geom, value in
                                      zip(block_gdf.geometry, block_gdf['value']))
                            block_raster = rasterize(
                                shapes=shapes,
                                out=block_raster,  # 使用预初始化的数组
                                transform=block_transform,
                                fill=8,  # 未覆盖区域为 8
                                dtype=rasterio.uint8,
                                all_touched=True  # 确保所有触及的像素都被计入
                            )

                        # 统计分块中 value=1 到 value=7 的像素点个数
                        print(f"分块 (x={x}, y={y}) 中像素值统计:")
                        unique, counts = np.unique(block_raster, return_counts=True)
                        value_counts = dict(zip(unique, counts))
                        for val in range(1, 8):  # 检查 value=1 到 value=7
                            count = value_counts.get(val, 0)
                            print(f"Value={val}: {count} 像素")
                        # 单独打印 value=8（背景值）
                        background_count = value_counts.get(8, 0)
                        print(f"Value=8 (background): {background_count} 像素")

                        # 直接写入当前分块到文件中
                        dst.write(block_raster, 1, window=((y, y + block_height), (x, x + block_width)))
                        print(f"已完成分块栅格化: x={x}, y={y}, 宽度={block_width}, 高度={block_height}")
                        pbar.update(1)

        # 读取整个栅格文件，统计最终像素分布
        with rasterio.open(output_raster_path) as src:
            final_raster = src.read(1)
            print("最终栅格文件中的像素值统计:")
            unique, counts = np.unique(final_raster, return_counts=True)
            value_counts = dict(zip(unique, counts))
            for val in range(1, 9):  # 检查 value=1 到 value=8
                count = value_counts.get(val, 0)
                print(f"Value={val}: {count} 像素")
            # 打印 nodata 值（255）
            nodata_count = value_counts.get(255, 0)
            print(f"Nodata (255): {nodata_count} 像素")

        print("栅格化完成！")
        print(f"栅格保存为: {output_raster_path}")
        print(f"栅格中类别值: 1=forest, 2=water, 3=shrub, 4=wetland, 5=jianzhu, 6=grass, 7=bareland, 8=background")

    except Exception as e:
        print(f"发生错误: {str(e)}")


# 使用示例
if __name__ == "__main__":
    input_files = [
        r"sddimaoshp/forest/sdforest.shp",
        r"sddimaoshp/water/shandongsuiyu3.shp",
        r"sddimaoshp/shrub/sdshrub.shp",
        r"sddimaoshp/wetland/sdwetland.shp",
        r"sddimaoshp/jianzhu/shandongjianzhu3.shp",
        r"sddimaoshp/grass/sdgrass.shp",
        r"sddimaoshp/bareland/sdbareland.shp"
    ]
    output_vector_file = r"sddimaoshp/hebingtif/shandongdimiao_10_1m.gpkg"
    output_raster_file = r"sddimaoshp/hebingtif/sddimao_10_1m.tif"

    # Step 1: 合并矢量
    vector_path = merge_vectors_seven_categories(input_files, output_vector_file)

    # Step 2: 如果合并成功，进行分块栅格化
    if vector_path:
        rasterize_vector_by_blocks(vector_path, output_raster_file, raster_resolution=10, block_size=5000)