GreenTransPowerCalculate/deeplabv3sdRenewable/tools/山东省地貌识别tools/潜力评估阶段/合并矢量转栅格但保存分类(7).py

229 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
import numpy as np
from shapely.geometry import mapping, box
import os
from tqdm import tqdm # 导入 tqdm 用于进度条
def merge_vectors_seven_categories(file_paths, output_vector_path):
"""
合并7个地貌类型矢量图层并保存为GeoPackage格式去掉cropland
参数:
file_paths: 包含7个矢量文件路径的列表 (按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland)
output_vector_path: 输出合并后的矢量文件路径(使用.gpkg格式
"""
try:
# 检查输入文件数量是否为7
if len(file_paths) != 7:
raise ValueError("请提供正好7个矢量文件路径")
for path in file_paths:
if not os.path.exists(path):
raise FileNotFoundError(f"文件不存在: {path}")
# 定义7个类别去掉cropland
categories = ['forest', 'water', 'shrub', 'wetland', 'jianzhu', 'grass', 'bareland']
category_values = {cat: i + 1 for i, cat in enumerate(categories)} # forest=1, water=2, ..., bareland=7
gdfs = []
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
print(f"正在读取第{i}个矢量文件: {path}")
gdf = gpd.read_file(path, engine="pyogrio")
gdf['category'] = category
gdf['value'] = category_values[category]
if 'Shape_Area' in gdf.columns:
gdf['Shape_Area'] = gdf['Shape_Area'].clip(upper=1e9)
gdfs.append(gdf)
# 统一坐标系
base_crs = gdfs[0].crs
for i, gdf in enumerate(gdfs[1:], 2):
if gdf.crs != base_crs:
print(f"{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
gdfs[i - 1] = gdf.to_crs(base_crs)
print("正在合并图层...")
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
merged_gdf = merged_gdf.set_geometry('geometry')
if not output_vector_path.endswith('.gpkg'):
output_vector_path = output_vector_path.replace('.shp', '.gpkg')
print("输出格式已更改为GeoPackage以支持大文件")
print(f"正在保存合并矢量结果到: {output_vector_path}")
merged_gdf.to_file(output_vector_path, driver='GPKG', engine="pyogrio")
print("矢量合并完成!")
print(f"合并矢量保存为: {output_vector_path}")
return output_vector_path
except Exception as e:
print(f"发生错误: {str(e)}")
return None
def rasterize_vector_by_blocks(input_vector_path, output_raster_path, raster_resolution=30, block_size=5000):
"""
将合并后的矢量文件分块转为栅格显示7个类别背景值设为 value=8添加进度条和像素统计
参数:
input_vector_path: 输入的合并矢量文件路径(.gpkg格式
output_raster_path: 输出栅格文件路径
raster_resolution: 输出栅格的分辨率默认30米
block_size: 每个分块的宽度和高度像素单位默认5000
"""
try:
# 读取合并后的矢量文件
print(f"正在读取合并矢量文件: {input_vector_path}")
merged_gdf = gpd.read_file(input_vector_path, engine="pyogrio")
# 检查 value 字段是否有效
if 'value' not in merged_gdf.columns:
raise ValueError("矢量文件中缺少 'value' 字段,请确保文件包含正确的分类值")
print("合并矢量文件中的 value 字段分布:")
print(merged_gdf['value'].value_counts(dropna=False))
if not merged_gdf['value'].between(1, 7).all():
print("警告: 'value' 字段包含无效值(应在 1-7 之间),可能导致数据丢失")
merged_gdf = merged_gdf[merged_gdf['value'].between(1, 7)]
# 检查合并后的矢量数据是否为空
if merged_gdf.empty:
raise ValueError("合并后的矢量数据为空,请检查输入数据是否有效")
# 按 value 字段排序,优先级高的类别(例如 value 较大)后处理,避免被覆盖
print("正在按 'value' 字段排序以确保优先级...")
merged_gdf = merged_gdf.sort_values(by='value', ascending=True)
# 计算总范围和栅格尺寸
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
total_width = int((bounds[2] - bounds[0]) / raster_resolution)
total_height = int((bounds[3] - bounds[1]) / raster_resolution)
if total_width <= 0 or total_height <= 0:
raise ValueError(f"栅格尺寸无效: 宽度={total_width}, 高度={total_height}")
print(f"栅格范围: minx={bounds[0]}, miny={bounds[1]}, maxx={bounds[2]}, maxy={bounds[3]}")
print(f"栅格尺寸: 宽度={total_width}, 高度={total_height}")
# 创建栅格变换
transform = rasterio.transform.from_bounds(bounds[0], bounds[1], bounds[2], bounds[3], total_width,
total_height)
# 计算总分块数以设置进度条
total_blocks = ((total_height + block_size - 1) // block_size) * ((total_width + block_size - 1) // block_size)
print(f"总分块数: {total_blocks}")
# 创建并写入栅格文件,逐块写入以减少内存占用
with rasterio.open(
output_raster_path,
'w',
driver='GTiff',
height=total_height,
width=total_width,
count=1,
dtype=rasterio.uint8,
crs=merged_gdf.crs,
transform=transform,
nodata=255 # 使用 255 作为 nodata 值,区分 value=8 的背景
) as dst:
# 分块处理并添加进度条
with tqdm(total=total_blocks, desc="栅格化进度") as pbar:
for y in range(0, total_height, block_size):
for x in range(0, total_width, block_size):
block_height = min(block_size, total_height - y)
block_width = min(block_size, total_width - x)
# 计算当前分块的地理范围
block_minx = bounds[0] + x * raster_resolution
block_maxy = bounds[3] - y * raster_resolution
block_maxx = block_minx + block_width * raster_resolution
block_miny = block_maxy - block_height * raster_resolution
# 创建分块边界框
block_bounds = box(block_minx, block_miny, block_maxx, block_maxy)
block_gdf = merged_gdf[merged_gdf.geometry.intersects(block_bounds)].copy()
# 初始化分块为背景值 8
block_raster = np.full((block_height, block_width), 8, dtype=np.uint8)
if not block_gdf.empty:
# 打印分块中的 value 字段分布
print(f"分块 (x={x}, y={y}) 中的 value 字段分布:")
print(block_gdf['value'].value_counts(dropna=False))
# 分块栅格化
block_transform = rasterio.transform.from_bounds(
block_minx, block_miny, block_maxx, block_maxy, block_width, block_height
)
shapes = ((mapping(geom), value) for geom, value in
zip(block_gdf.geometry, block_gdf['value']))
block_raster = rasterize(
shapes=shapes,
out=block_raster, # 使用预初始化的数组
transform=block_transform,
fill=8, # 未覆盖区域为 8
dtype=rasterio.uint8,
all_touched=True # 确保所有触及的像素都被计入
)
# 统计分块中 value=1 到 value=7 的像素点个数
print(f"分块 (x={x}, y={y}) 中像素值统计:")
unique, counts = np.unique(block_raster, return_counts=True)
value_counts = dict(zip(unique, counts))
for val in range(1, 8): # 检查 value=1 到 value=7
count = value_counts.get(val, 0)
print(f"Value={val}: {count} 像素")
# 单独打印 value=8背景值
background_count = value_counts.get(8, 0)
print(f"Value=8 (background): {background_count} 像素")
# 直接写入当前分块到文件中
dst.write(block_raster, 1, window=((y, y + block_height), (x, x + block_width)))
print(f"已完成分块栅格化: x={x}, y={y}, 宽度={block_width}, 高度={block_height}")
pbar.update(1)
# 读取整个栅格文件,统计最终像素分布
with rasterio.open(output_raster_path) as src:
final_raster = src.read(1)
print("最终栅格文件中的像素值统计:")
unique, counts = np.unique(final_raster, return_counts=True)
value_counts = dict(zip(unique, counts))
for val in range(1, 9): # 检查 value=1 到 value=8
count = value_counts.get(val, 0)
print(f"Value={val}: {count} 像素")
# 打印 nodata 值255
nodata_count = value_counts.get(255, 0)
print(f"Nodata (255): {nodata_count} 像素")
print("栅格化完成!")
print(f"栅格保存为: {output_raster_path}")
print(f"栅格中类别值: 1=forest, 2=water, 3=shrub, 4=wetland, 5=jianzhu, 6=grass, 7=bareland, 8=background")
except Exception as e:
print(f"发生错误: {str(e)}")
# 使用示例
if __name__ == "__main__":
input_files = [
r"sddimaoshp/forest/sdforest.shp",
r"sddimaoshp/water/shandongsuiyu3.shp",
r"sddimaoshp/shrub/sdshrub.shp",
r"sddimaoshp/wetland/sdwetland.shp",
r"sddimaoshp/jianzhu/shandongjianzhu3.shp",
r"sddimaoshp/grass/sdgrass.shp",
r"sddimaoshp/bareland/sdbareland.shp"
]
output_vector_file = r"sddimaoshp/hebingtif/shandongdimiao_10_1m.gpkg"
output_raster_file = r"sddimaoshp/hebingtif/sddimao_10_1m.tif"
# Step 1: 合并矢量
vector_path = merge_vectors_seven_categories(input_files, output_vector_file)
# Step 2: 如果合并成功,进行分块栅格化
if vector_path:
rasterize_vector_by_blocks(vector_path, output_raster_file, raster_resolution=10, block_size=5000)