189 lines
9.1 KiB
Python
189 lines
9.1 KiB
Python
|
import geopandas as gpd
|
|||
|
import pandas as pd
|
|||
|
import rasterio
|
|||
|
from rasterio.features import rasterize
|
|||
|
import numpy as np
|
|||
|
from shapely.geometry import mapping, box
|
|||
|
from tqdm import tqdm
|
|||
|
import os
|
|||
|
|
|||
|
def merge_and_rasterize_vectors_seven_categories(file_paths, output_vector_path, output_raster_path,
|
|||
|
raster_resolution=10, block_size=2000, chunk_size=10000):
|
|||
|
"""
|
|||
|
合并7个地貌类型矢量图层,转为栅格并显示7个类别,保留0值作为背景,优化合并策略(去掉cropland)
|
|||
|
|
|||
|
参数:
|
|||
|
file_paths: 包含7个矢量文件路径的列表 (按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland)
|
|||
|
output_vector_path: 输出合并后的矢量文件路径(建议使用.gpkg格式)
|
|||
|
output_raster_path: 输出栅格文件路径
|
|||
|
raster_resolution: 输出栅格的分辨率(默认10米)
|
|||
|
block_size: 每个分块的宽度和高度(像素单位,默认2000)
|
|||
|
chunk_size: 每次处理矢量数据的块大小(默认10000条记录)
|
|||
|
"""
|
|||
|
try:
|
|||
|
# 检查输入文件数量
|
|||
|
if len(file_paths) != 7:
|
|||
|
raise ValueError("请提供正好7个矢量文件路径")
|
|||
|
|
|||
|
# 定义类别映射,按照指定顺序(去掉cropland)
|
|||
|
categories = ['forest', 'water', 'shrub', 'wetland', 'jianzhu', 'grass', 'bareland']
|
|||
|
category_values = {cat: i + 1 for i, cat in enumerate(categories)} # forest=1, water=2, ..., bareland=7
|
|||
|
|
|||
|
# 逐块读取矢量文件,合并到内存中
|
|||
|
gdfs = []
|
|||
|
base_crs = None
|
|||
|
for i, (path, category) in enumerate(tqdm(zip(file_paths, categories), total=7, desc="读取矢量文件进度"), 1):
|
|||
|
print(f"正在处理第{i}个矢量文件: {path}")
|
|||
|
gdf = gpd.read_file(path, engine="pyogrio")
|
|||
|
gdf['category'] = category # 添加类别字段
|
|||
|
gdf['value'] = category_values[category] # 添加数值字段用于栅格化
|
|||
|
gdf['value'] = gdf['value'].astype(int) # 确保 value 为整数类型
|
|||
|
if 'Shape_Area' in gdf.columns:
|
|||
|
gdf['Shape_Area'] = gdf['Shape_Area'].clip(upper=1e9) # 限制最大值,避免溢出
|
|||
|
|
|||
|
# 统一坐标系(以第一个图层为基准)
|
|||
|
if i == 1:
|
|||
|
base_crs = gdf.crs
|
|||
|
else:
|
|||
|
if gdf.crs != base_crs:
|
|||
|
print(f"第{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
|
|||
|
gdf = gdf.to_crs(base_crs)
|
|||
|
|
|||
|
print(f"第{i}个图层 value 字段分布:", gdf['value'].value_counts(dropna=False))
|
|||
|
gdfs.append(gdf)
|
|||
|
|
|||
|
# 在内存中合并所有 gdf
|
|||
|
print("正在合并所有图层...")
|
|||
|
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=base_crs)
|
|||
|
print("合并后 value 字段分布:", merged_gdf['value'].value_counts(dropna=False))
|
|||
|
|
|||
|
# 保存合并后的矢量文件
|
|||
|
print(f"正在保存合并矢量结果到: {output_vector_path}")
|
|||
|
merged_gdf.to_file(output_vector_path, driver='GPKG', engine="pyogrio")
|
|||
|
|
|||
|
# 重新加载合并后的文件,检查 value 字段
|
|||
|
print("正在重新加载合并后的文件...")
|
|||
|
merged_gdf = gpd.read_file(output_vector_path, engine="pyogrio")
|
|||
|
print("重新加载后 value 字段分布:", merged_gdf['value'].value_counts(dropna=False))
|
|||
|
|
|||
|
# 数据清洗:处理 value 字段
|
|||
|
print("正在清洗 'value' 字段数据...")
|
|||
|
# 处理 nan 值
|
|||
|
if merged_gdf['value'].isna().any():
|
|||
|
print("警告: 'value' 字段包含 nan 值,将替换为 0")
|
|||
|
merged_gdf['value'] = merged_gdf['value'].fillna(0)
|
|||
|
# 转换为整数类型
|
|||
|
merged_gdf['value'] = merged_gdf['value'].astype(float).astype(int)
|
|||
|
# 确保值在 0-7 范围内
|
|||
|
valid_values = set(range(0, 8)) # 0=背景, 1=forest, ..., 7=bareland
|
|||
|
if not merged_gdf['value'].isin(valid_values).all():
|
|||
|
print(f"警告: 'value' 字段包含无效值: {set(merged_gdf['value'].unique()) - valid_values},将替换为 0")
|
|||
|
merged_gdf['value'] = merged_gdf['value'].apply(lambda x: x if x in valid_values else 0)
|
|||
|
|
|||
|
print("清洗后 value 字段分布:", merged_gdf['value'].value_counts(dropna=False))
|
|||
|
|
|||
|
# 构建空间索引以加速后续栅格化
|
|||
|
print("正在构建空间索引以加速栅格化...")
|
|||
|
sindex = merged_gdf.sindex
|
|||
|
|
|||
|
# 计算栅格化的范围
|
|||
|
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
|
|||
|
width = int((bounds[2] - bounds[0]) / raster_resolution)
|
|||
|
height = int((bounds[3] - bounds[1]) / raster_resolution)
|
|||
|
|
|||
|
# 检查栅格尺寸是否有效
|
|||
|
if width <= 0 or height <= 0:
|
|||
|
raise ValueError(f"栅格尺寸无效: 宽度={width}, 高度={height},检查范围或分辨率")
|
|||
|
|
|||
|
# 创建栅格化变换
|
|||
|
transform = rasterio.transform.from_bounds(
|
|||
|
bounds[0], bounds[1], bounds[2], bounds[3], width, height
|
|||
|
)
|
|||
|
|
|||
|
# 计算总分块数以设置进度条
|
|||
|
total_blocks = ((height + block_size - 1) // block_size) * ((width + block_size - 1) // block_size)
|
|||
|
print(f"总分块数: {total_blocks}")
|
|||
|
|
|||
|
# 创建并写入栅格文件(逐块写入,避免一次性占用大量内存)
|
|||
|
with rasterio.open(
|
|||
|
output_raster_path,
|
|||
|
'w',
|
|||
|
driver='GTiff',
|
|||
|
height=height,
|
|||
|
width=width,
|
|||
|
count=1,
|
|||
|
dtype=rasterio.uint8,
|
|||
|
crs=base_crs,
|
|||
|
transform=transform,
|
|||
|
nodata=0
|
|||
|
) as dst:
|
|||
|
# 分块处理并添加进度条
|
|||
|
with tqdm(total=total_blocks, desc="栅格化进度") as pbar:
|
|||
|
for y in range(0, height, block_size):
|
|||
|
for x in range(0, width, block_size):
|
|||
|
block_height = min(block_size, height - y)
|
|||
|
block_width = min(block_size, width - x)
|
|||
|
|
|||
|
# 计算当前分块的地理范围
|
|||
|
block_minx = bounds[0] + x * raster_resolution
|
|||
|
block_maxy = bounds[3] - y * raster_resolution
|
|||
|
block_maxx = block_minx + block_width * raster_resolution
|
|||
|
block_miny = block_maxy - block_height * raster_resolution
|
|||
|
|
|||
|
# 创建分块边界框并使用空间索引查询
|
|||
|
block_bounds = box(block_minx, block_miny, block_maxx, block_maxy)
|
|||
|
indices = sindex.intersection(block_bounds.bounds)
|
|||
|
block_gdf = merged_gdf.iloc[list(indices)].copy()
|
|||
|
|
|||
|
if block_gdf.empty:
|
|||
|
pbar.update(1)
|
|||
|
continue
|
|||
|
|
|||
|
# 打印分块中的 value 分布
|
|||
|
print(f"分块 (x={x}, y={y}) value 字段分布:", block_gdf['value'].value_counts(dropna=False))
|
|||
|
|
|||
|
# 分块栅格化
|
|||
|
block_transform = rasterio.transform.from_bounds(
|
|||
|
block_minx, block_miny, block_maxx, block_maxy, block_width, block_height
|
|||
|
)
|
|||
|
shapes = [(mapping(geom), value) for geom, value in zip(block_gdf.geometry, block_gdf['value'])]
|
|||
|
block_raster = rasterize(
|
|||
|
shapes=shapes,
|
|||
|
out_shape=(block_height, block_width),
|
|||
|
transform=block_transform,
|
|||
|
fill=0,
|
|||
|
dtype=rasterio.uint8
|
|||
|
)
|
|||
|
|
|||
|
# 直接写入当前分块到文件中
|
|||
|
dst.write(block_raster, 1, window=((y, y + block_height), (x, x + block_width)))
|
|||
|
pbar.update(1)
|
|||
|
|
|||
|
print("处理完成!")
|
|||
|
print(f"合并矢量保存为: {output_vector_path}")
|
|||
|
print(f"栅格保存为: {output_raster_path}")
|
|||
|
print(
|
|||
|
f"栅格中类别值: 0=背景, 1=forest, 2=water, 3=shrub, 4=wetland, 5=jianzhu, 6=grass, 7=bareland")
|
|||
|
|
|||
|
except Exception as e:
|
|||
|
print(f"发生错误: {str(e)}")
|
|||
|
|
|||
|
|
|||
|
# 使用示例
|
|||
|
if __name__ == "__main__":
|
|||
|
# 输入7个矢量文件路径(按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland,去掉cropland)
|
|||
|
input_files = [
|
|||
|
r"weifang/weifang/forest/forest31.shp",
|
|||
|
r"weifang/weifang/water/water1.shp",
|
|||
|
r"weifang/weifang/shrub/shrub.shp",
|
|||
|
r"weifang/weifang/wetland/wetland.shp",
|
|||
|
r"weifang/weifang/jianzhu/jianzhu1.shp",
|
|||
|
r"weifang/weifang/grass/grass.shp",
|
|||
|
r"weifang/weifang/bareland/bareland.shp"
|
|||
|
]
|
|||
|
output_vector_file = r"weifang/weifang/weifanghebing/weifangdimiao.gpkg" # 改为.gpkg
|
|||
|
output_raster_file = r"weifang/weifang/weifanghebing/weifangdimao.tif"
|
|||
|
|
|||
|
# 执行合并和栅格化
|
|||
|
merge_and_rasterize_vectors_seven_categories(input_files, output_vector_file, output_raster_file,
|
|||
|
raster_resolution=10, block_size=2000, chunk_size=10000)
|