This commit is contained in:
linnce 2025-04-27 09:58:17 +08:00
parent 0e9d2ef236
commit adca4f89af
9 changed files with 1413 additions and 0 deletions

View File

@ -0,0 +1,189 @@
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
import numpy as np
from shapely.geometry import mapping, box
from tqdm import tqdm
import os
def merge_and_rasterize_vectors_seven_categories(file_paths, output_vector_path, output_raster_path,
raster_resolution=10, block_size=2000, chunk_size=10000):
"""
合并7个地貌类型矢量图层转为栅格并显示7个类别保留0值作为背景优化合并策略去掉cropland
参数:
file_paths: 包含7个矢量文件路径的列表 (按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland)
output_vector_path: 输出合并后的矢量文件路径建议使用.gpkg格式
output_raster_path: 输出栅格文件路径
raster_resolution: 输出栅格的分辨率默认10米
block_size: 每个分块的宽度和高度像素单位默认2000
chunk_size: 每次处理矢量数据的块大小默认10000条记录
"""
try:
# 检查输入文件数量
if len(file_paths) != 7:
raise ValueError("请提供正好7个矢量文件路径")
# 定义类别映射按照指定顺序去掉cropland
categories = ['forest', 'water', 'shrub', 'wetland', 'jianzhu', 'grass', 'bareland']
category_values = {cat: i + 1 for i, cat in enumerate(categories)} # forest=1, water=2, ..., bareland=7
# 逐块读取矢量文件,合并到内存中
gdfs = []
base_crs = None
for i, (path, category) in enumerate(tqdm(zip(file_paths, categories), total=7, desc="读取矢量文件进度"), 1):
print(f"正在处理第{i}个矢量文件: {path}")
gdf = gpd.read_file(path, engine="pyogrio")
gdf['category'] = category # 添加类别字段
gdf['value'] = category_values[category] # 添加数值字段用于栅格化
gdf['value'] = gdf['value'].astype(int) # 确保 value 为整数类型
if 'Shape_Area' in gdf.columns:
gdf['Shape_Area'] = gdf['Shape_Area'].clip(upper=1e9) # 限制最大值,避免溢出
# 统一坐标系(以第一个图层为基准)
if i == 1:
base_crs = gdf.crs
else:
if gdf.crs != base_crs:
print(f"{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
gdf = gdf.to_crs(base_crs)
print(f"{i}个图层 value 字段分布:", gdf['value'].value_counts(dropna=False))
gdfs.append(gdf)
# 在内存中合并所有 gdf
print("正在合并所有图层...")
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=base_crs)
print("合并后 value 字段分布:", merged_gdf['value'].value_counts(dropna=False))
# 保存合并后的矢量文件
print(f"正在保存合并矢量结果到: {output_vector_path}")
merged_gdf.to_file(output_vector_path, driver='GPKG', engine="pyogrio")
# 重新加载合并后的文件,检查 value 字段
print("正在重新加载合并后的文件...")
merged_gdf = gpd.read_file(output_vector_path, engine="pyogrio")
print("重新加载后 value 字段分布:", merged_gdf['value'].value_counts(dropna=False))
# 数据清洗:处理 value 字段
print("正在清洗 'value' 字段数据...")
# 处理 nan 值
if merged_gdf['value'].isna().any():
print("警告: 'value' 字段包含 nan 值,将替换为 0")
merged_gdf['value'] = merged_gdf['value'].fillna(0)
# 转换为整数类型
merged_gdf['value'] = merged_gdf['value'].astype(float).astype(int)
# 确保值在 0-7 范围内
valid_values = set(range(0, 8)) # 0=背景, 1=forest, ..., 7=bareland
if not merged_gdf['value'].isin(valid_values).all():
print(f"警告: 'value' 字段包含无效值: {set(merged_gdf['value'].unique()) - valid_values},将替换为 0")
merged_gdf['value'] = merged_gdf['value'].apply(lambda x: x if x in valid_values else 0)
print("清洗后 value 字段分布:", merged_gdf['value'].value_counts(dropna=False))
# 构建空间索引以加速后续栅格化
print("正在构建空间索引以加速栅格化...")
sindex = merged_gdf.sindex
# 计算栅格化的范围
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
width = int((bounds[2] - bounds[0]) / raster_resolution)
height = int((bounds[3] - bounds[1]) / raster_resolution)
# 检查栅格尺寸是否有效
if width <= 0 or height <= 0:
raise ValueError(f"栅格尺寸无效: 宽度={width}, 高度={height},检查范围或分辨率")
# 创建栅格化变换
transform = rasterio.transform.from_bounds(
bounds[0], bounds[1], bounds[2], bounds[3], width, height
)
# 计算总分块数以设置进度条
total_blocks = ((height + block_size - 1) // block_size) * ((width + block_size - 1) // block_size)
print(f"总分块数: {total_blocks}")
# 创建并写入栅格文件(逐块写入,避免一次性占用大量内存)
with rasterio.open(
output_raster_path,
'w',
driver='GTiff',
height=height,
width=width,
count=1,
dtype=rasterio.uint8,
crs=base_crs,
transform=transform,
nodata=0
) as dst:
# 分块处理并添加进度条
with tqdm(total=total_blocks, desc="栅格化进度") as pbar:
for y in range(0, height, block_size):
for x in range(0, width, block_size):
block_height = min(block_size, height - y)
block_width = min(block_size, width - x)
# 计算当前分块的地理范围
block_minx = bounds[0] + x * raster_resolution
block_maxy = bounds[3] - y * raster_resolution
block_maxx = block_minx + block_width * raster_resolution
block_miny = block_maxy - block_height * raster_resolution
# 创建分块边界框并使用空间索引查询
block_bounds = box(block_minx, block_miny, block_maxx, block_maxy)
indices = sindex.intersection(block_bounds.bounds)
block_gdf = merged_gdf.iloc[list(indices)].copy()
if block_gdf.empty:
pbar.update(1)
continue
# 打印分块中的 value 分布
print(f"分块 (x={x}, y={y}) value 字段分布:", block_gdf['value'].value_counts(dropna=False))
# 分块栅格化
block_transform = rasterio.transform.from_bounds(
block_minx, block_miny, block_maxx, block_maxy, block_width, block_height
)
shapes = [(mapping(geom), value) for geom, value in zip(block_gdf.geometry, block_gdf['value'])]
block_raster = rasterize(
shapes=shapes,
out_shape=(block_height, block_width),
transform=block_transform,
fill=0,
dtype=rasterio.uint8
)
# 直接写入当前分块到文件中
dst.write(block_raster, 1, window=((y, y + block_height), (x, x + block_width)))
pbar.update(1)
print("处理完成!")
print(f"合并矢量保存为: {output_vector_path}")
print(f"栅格保存为: {output_raster_path}")
print(
f"栅格中类别值: 0=背景, 1=forest, 2=water, 3=shrub, 4=wetland, 5=jianzhu, 6=grass, 7=bareland")
except Exception as e:
print(f"发生错误: {str(e)}")
# 使用示例
if __name__ == "__main__":
# 输入7个矢量文件路径按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland去掉cropland
input_files = [
r"weifang/weifang/forest/forest31.shp",
r"weifang/weifang/water/water1.shp",
r"weifang/weifang/shrub/shrub.shp",
r"weifang/weifang/wetland/wetland.shp",
r"weifang/weifang/jianzhu/jianzhu1.shp",
r"weifang/weifang/grass/grass.shp",
r"weifang/weifang/bareland/bareland.shp"
]
output_vector_file = r"weifang/weifang/weifanghebing/weifangdimiao.gpkg" # 改为.gpkg
output_raster_file = r"weifang/weifang/weifanghebing/weifangdimao.tif"
# 执行合并和栅格化
merge_and_rasterize_vectors_seven_categories(input_files, output_vector_file, output_raster_file,
raster_resolution=10, block_size=2000, chunk_size=10000)

View File

@ -0,0 +1,229 @@
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
import numpy as np
from shapely.geometry import mapping, box
import os
from tqdm import tqdm # 导入 tqdm 用于进度条
def merge_vectors_seven_categories(file_paths, output_vector_path):
"""
合并7个地貌类型矢量图层并保存为GeoPackage格式去掉cropland
参数:
file_paths: 包含7个矢量文件路径的列表 (按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland)
output_vector_path: 输出合并后的矢量文件路径使用.gpkg格式
"""
try:
# 检查输入文件数量是否为7
if len(file_paths) != 7:
raise ValueError("请提供正好7个矢量文件路径")
for path in file_paths:
if not os.path.exists(path):
raise FileNotFoundError(f"文件不存在: {path}")
# 定义7个类别去掉cropland
categories = ['forest', 'water', 'shrub', 'wetland', 'jianzhu', 'grass', 'bareland']
category_values = {cat: i + 1 for i, cat in enumerate(categories)} # forest=1, water=2, ..., bareland=7
gdfs = []
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
print(f"正在读取第{i}个矢量文件: {path}")
gdf = gpd.read_file(path, engine="pyogrio")
gdf['category'] = category
gdf['value'] = category_values[category]
if 'Shape_Area' in gdf.columns:
gdf['Shape_Area'] = gdf['Shape_Area'].clip(upper=1e9)
gdfs.append(gdf)
# 统一坐标系
base_crs = gdfs[0].crs
for i, gdf in enumerate(gdfs[1:], 2):
if gdf.crs != base_crs:
print(f"{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
gdfs[i - 1] = gdf.to_crs(base_crs)
print("正在合并图层...")
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
merged_gdf = merged_gdf.set_geometry('geometry')
if not output_vector_path.endswith('.gpkg'):
output_vector_path = output_vector_path.replace('.shp', '.gpkg')
print("输出格式已更改为GeoPackage以支持大文件")
print(f"正在保存合并矢量结果到: {output_vector_path}")
merged_gdf.to_file(output_vector_path, driver='GPKG', engine="pyogrio")
print("矢量合并完成!")
print(f"合并矢量保存为: {output_vector_path}")
return output_vector_path
except Exception as e:
print(f"发生错误: {str(e)}")
return None
def rasterize_vector_by_blocks(input_vector_path, output_raster_path, raster_resolution=30, block_size=5000):
"""
将合并后的矢量文件分块转为栅格显示7个类别背景值设为 value=8添加进度条和像素统计
参数:
input_vector_path: 输入的合并矢量文件路径.gpkg格式
output_raster_path: 输出栅格文件路径
raster_resolution: 输出栅格的分辨率默认30米
block_size: 每个分块的宽度和高度像素单位默认5000
"""
try:
# 读取合并后的矢量文件
print(f"正在读取合并矢量文件: {input_vector_path}")
merged_gdf = gpd.read_file(input_vector_path, engine="pyogrio")
# 检查 value 字段是否有效
if 'value' not in merged_gdf.columns:
raise ValueError("矢量文件中缺少 'value' 字段,请确保文件包含正确的分类值")
print("合并矢量文件中的 value 字段分布:")
print(merged_gdf['value'].value_counts(dropna=False))
if not merged_gdf['value'].between(1, 7).all():
print("警告: 'value' 字段包含无效值(应在 1-7 之间),可能导致数据丢失")
merged_gdf = merged_gdf[merged_gdf['value'].between(1, 7)]
# 检查合并后的矢量数据是否为空
if merged_gdf.empty:
raise ValueError("合并后的矢量数据为空,请检查输入数据是否有效")
# 按 value 字段排序,优先级高的类别(例如 value 较大)后处理,避免被覆盖
print("正在按 'value' 字段排序以确保优先级...")
merged_gdf = merged_gdf.sort_values(by='value', ascending=True)
# 计算总范围和栅格尺寸
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
total_width = int((bounds[2] - bounds[0]) / raster_resolution)
total_height = int((bounds[3] - bounds[1]) / raster_resolution)
if total_width <= 0 or total_height <= 0:
raise ValueError(f"栅格尺寸无效: 宽度={total_width}, 高度={total_height}")
print(f"栅格范围: minx={bounds[0]}, miny={bounds[1]}, maxx={bounds[2]}, maxy={bounds[3]}")
print(f"栅格尺寸: 宽度={total_width}, 高度={total_height}")
# 创建栅格变换
transform = rasterio.transform.from_bounds(bounds[0], bounds[1], bounds[2], bounds[3], total_width,
total_height)
# 计算总分块数以设置进度条
total_blocks = ((total_height + block_size - 1) // block_size) * ((total_width + block_size - 1) // block_size)
print(f"总分块数: {total_blocks}")
# 创建并写入栅格文件,逐块写入以减少内存占用
with rasterio.open(
output_raster_path,
'w',
driver='GTiff',
height=total_height,
width=total_width,
count=1,
dtype=rasterio.uint8,
crs=merged_gdf.crs,
transform=transform,
nodata=255 # 使用 255 作为 nodata 值,区分 value=8 的背景
) as dst:
# 分块处理并添加进度条
with tqdm(total=total_blocks, desc="栅格化进度") as pbar:
for y in range(0, total_height, block_size):
for x in range(0, total_width, block_size):
block_height = min(block_size, total_height - y)
block_width = min(block_size, total_width - x)
# 计算当前分块的地理范围
block_minx = bounds[0] + x * raster_resolution
block_maxy = bounds[3] - y * raster_resolution
block_maxx = block_minx + block_width * raster_resolution
block_miny = block_maxy - block_height * raster_resolution
# 创建分块边界框
block_bounds = box(block_minx, block_miny, block_maxx, block_maxy)
block_gdf = merged_gdf[merged_gdf.geometry.intersects(block_bounds)].copy()
# 初始化分块为背景值 8
block_raster = np.full((block_height, block_width), 8, dtype=np.uint8)
if not block_gdf.empty:
# 打印分块中的 value 字段分布
print(f"分块 (x={x}, y={y}) 中的 value 字段分布:")
print(block_gdf['value'].value_counts(dropna=False))
# 分块栅格化
block_transform = rasterio.transform.from_bounds(
block_minx, block_miny, block_maxx, block_maxy, block_width, block_height
)
shapes = ((mapping(geom), value) for geom, value in
zip(block_gdf.geometry, block_gdf['value']))
block_raster = rasterize(
shapes=shapes,
out=block_raster, # 使用预初始化的数组
transform=block_transform,
fill=8, # 未覆盖区域为 8
dtype=rasterio.uint8,
all_touched=True # 确保所有触及的像素都被计入
)
# 统计分块中 value=1 到 value=7 的像素点个数
print(f"分块 (x={x}, y={y}) 中像素值统计:")
unique, counts = np.unique(block_raster, return_counts=True)
value_counts = dict(zip(unique, counts))
for val in range(1, 8): # 检查 value=1 到 value=7
count = value_counts.get(val, 0)
print(f"Value={val}: {count} 像素")
# 单独打印 value=8背景值
background_count = value_counts.get(8, 0)
print(f"Value=8 (background): {background_count} 像素")
# 直接写入当前分块到文件中
dst.write(block_raster, 1, window=((y, y + block_height), (x, x + block_width)))
print(f"已完成分块栅格化: x={x}, y={y}, 宽度={block_width}, 高度={block_height}")
pbar.update(1)
# 读取整个栅格文件,统计最终像素分布
with rasterio.open(output_raster_path) as src:
final_raster = src.read(1)
print("最终栅格文件中的像素值统计:")
unique, counts = np.unique(final_raster, return_counts=True)
value_counts = dict(zip(unique, counts))
for val in range(1, 9): # 检查 value=1 到 value=8
count = value_counts.get(val, 0)
print(f"Value={val}: {count} 像素")
# 打印 nodata 值255
nodata_count = value_counts.get(255, 0)
print(f"Nodata (255): {nodata_count} 像素")
print("栅格化完成!")
print(f"栅格保存为: {output_raster_path}")
print(f"栅格中类别值: 1=forest, 2=water, 3=shrub, 4=wetland, 5=jianzhu, 6=grass, 7=bareland, 8=background")
except Exception as e:
print(f"发生错误: {str(e)}")
# 使用示例
if __name__ == "__main__":
input_files = [
r"sddimaoshp/forest/sdforest.shp",
r"sddimaoshp/water/shandongsuiyu3.shp",
r"sddimaoshp/shrub/sdshrub.shp",
r"sddimaoshp/wetland/sdwetland.shp",
r"sddimaoshp/jianzhu/shandongjianzhu3.shp",
r"sddimaoshp/grass/sdgrass.shp",
r"sddimaoshp/bareland/sdbareland.shp"
]
output_vector_file = r"sddimaoshp/hebingtif/shandongdimiao_10_1m.gpkg"
output_raster_file = r"sddimaoshp/hebingtif/sddimao_10_1m.tif"
# Step 1: 合并矢量
vector_path = merge_vectors_seven_categories(input_files, output_vector_file)
# Step 2: 如果合并成功,进行分块栅格化
if vector_path:
rasterize_vector_by_blocks(vector_path, output_raster_file, raster_resolution=10, block_size=5000)

View File

@ -0,0 +1,191 @@
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
import numpy as np
from shapely.geometry import mapping, box
import os
from tqdm import tqdm # 导入 tqdm 用于进度条
def merge_vectors_seven_categories(file_paths, output_vector_path):
"""
合并7个地貌类型矢量图层并保存为GeoPackage格式去掉cropland
参数:
file_paths: 包含7个矢量文件路径的列表 (按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland)
output_vector_path: 输出合并后的矢量文件路径使用.gpkg格式
"""
try:
# 检查输入文件数量是否为7
if len(file_paths) != 7:
raise ValueError("请提供正好7个矢量文件路径")
for path in file_paths:
if not os.path.exists(path):
raise FileNotFoundError(f"文件不存在: {path}")
# 定义7个类别去掉cropland
categories = ['forest', 'water', 'shrub', 'wetland', 'jianzhu', 'grass', 'bareland']
category_values = {cat: i + 1 for i, cat in enumerate(categories)} # forest=1, water=2, ..., bareland=7
gdfs = []
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
print(f"正在读取第{i}个矢量文件: {path}")
gdf = gpd.read_file(path, engine="pyogrio")
gdf['category'] = category
gdf['value'] = category_values[category]
if 'Shape_Area' in gdf.columns:
gdf['Shape_Area'] = gdf['Shape_Area'].clip(upper=1e9)
gdfs.append(gdf)
# 统一坐标系
base_crs = gdfs[0].crs
for i, gdf in enumerate(gdfs[1:], 2):
if gdf.crs != base_crs:
print(f"{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
gdfs[i - 1] = gdf.to_crs(base_crs)
print("正在合并图层...")
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
merged_gdf = merged_gdf.set_geometry('geometry')
if not output_vector_path.endswith('.gpkg'):
output_vector_path = output_vector_path.replace('.shp', '.gpkg')
print("输出格式已更改为GeoPackage以支持大文件")
print(f"正在保存合并矢量结果到: {output_vector_path}")
merged_gdf.to_file(output_vector_path, driver='GPKG', engine="pyogrio")
print("矢量合并完成!")
print(f"合并矢量保存为: {output_vector_path}")
return output_vector_path
except Exception as e:
print(f"发生错误: {str(e)}")
return None
def rasterize_vector_by_blocks(input_vector_path, output_raster_path, raster_resolution=30, block_size=5000):
"""
将合并后的矢量文件分块转为栅格显示7个类别背景值设为 value=8添加进度条不清理数据
参数:
input_vector_path: 输入的合并矢量文件路径.gpkg格式
output_raster_path: 输出栅格文件路径
raster_resolution: 输出栅格的分辨率默认30米
block_size: 每个分块的宽度和高度像素单位默认5000
"""
try:
# 读取合并后的矢量文件
print(f"正在读取合并矢量文件: {input_vector_path}")
merged_gdf = gpd.read_file(input_vector_path, engine="pyogrio")
# 检查 value 字段是否有效
if 'value' not in merged_gdf.columns:
raise ValueError("矢量文件中缺少 'value' 字段,请确保文件包含正确的分类值")
if not merged_gdf['value'].between(1, 7).all():
print("警告: 'value' 字段包含无效值(应在 1-7 之间),可能导致数据丢失")
merged_gdf = merged_gdf[merged_gdf['value'].between(1, 7)]
# 按 value 字段排序,优先级高的类别(例如 value 较大)后处理,避免被覆盖
print("正在按 'value' 字段排序以确保优先级...")
merged_gdf = merged_gdf.sort_values(by='value', ascending=True)
# 计算总范围和栅格尺寸
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
total_width = int((bounds[2] - bounds[0]) / raster_resolution)
total_height = int((bounds[3] - bounds[1]) / raster_resolution)
if total_width <= 0 or total_height <= 0:
raise ValueError(f"栅格尺寸无效: 宽度={total_width}, 高度={total_height}")
# 创建栅格变换
transform = rasterio.transform.from_bounds(bounds[0], bounds[1], bounds[2], bounds[3], total_width,
total_height)
# 计算总分块数以设置进度条
total_blocks = ((total_height + block_size - 1) // block_size) * ((total_width + block_size - 1) // block_size)
print(f"总分块数: {total_blocks}")
# 创建并写入栅格文件,逐块写入以减少内存占用
with rasterio.open(
output_raster_path,
'w',
driver='GTiff',
height=total_height,
width=total_width,
count=1,
dtype=rasterio.uint8,
crs=merged_gdf.crs,
transform=transform,
nodata=255 # 使用 255 作为 nodata 值,区分 value=8 的背景
) as dst:
# 分块处理并添加进度条
with tqdm(total=total_blocks, desc="栅格化进度") as pbar:
for y in range(0, total_height, block_size):
for x in range(0, total_width, block_size):
block_height = min(block_size, total_height - y)
block_width = min(block_size, total_width - x)
# 计算当前分块的地理范围
block_minx = bounds[0] + x * raster_resolution
block_maxy = bounds[3] - y * raster_resolution
block_maxx = block_minx + block_width * raster_resolution
block_miny = block_maxy - block_height * raster_resolution
# 创建分块边界框
block_bounds = box(block_minx, block_miny, block_maxx, block_maxy)
block_gdf = merged_gdf[merged_gdf.geometry.intersects(block_bounds)].copy()
# 初始化分块为背景值 8
block_raster = np.full((block_height, block_width), 8, dtype=np.uint8)
if not block_gdf.empty:
# 分块栅格化
block_transform = rasterio.transform.from_bounds(
block_minx, block_miny, block_maxx, block_miny, block_width, block_height
)
shapes = ((mapping(geom), value) for geom, value in
zip(block_gdf.geometry, block_gdf['value']))
block_raster = rasterize(
shapes=shapes,
out=block_raster, # 使用预初始化的数组
transform=block_transform,
fill=8, # 未覆盖区域为 8
dtype=rasterio.uint8,
all_touched=True # 确保所有触及的像素都被计入
)
# 直接写入当前分块到文件中
dst.write(block_raster, 1, window=((y, y + block_height), (x, x + block_width)))
print(f"已完成分块栅格化: x={x}, y={y}, 宽度={block_width}, 高度={block_height}")
pbar.update(1)
print("栅格化完成!")
print(f"栅格保存为: {output_raster_path}")
print(f"栅格中类别值: 1=forest, 2=water, 3=shrub, 4=wetland, 5=jianzhu, 6=grass, 7=bareland, 8=background")
except Exception as e:
print(f"发生错误: {str(e)}")
# 使用示例
if __name__ == "__main__":
input_files = [
r"sddimaoshp/forest/sdforest.shp",
r"sddimaoshp/water/sdwater.shp",
r"sddimaoshp/shrub/sdshrub.shp",
r"sddimaoshp/wetland/sdwetland.shp",
r"sddimaoshp/jianzhu/jianzhu2.shp",
r"sddimaoshp/grass/sdgrass.shp",
r"sddimaoshp/bareland/sdbareland.shp"
]
output_vector_file = r"sddimaoshp/hebingtif/shandongdimiao31.gpkg"
output_raster_file = r"sddimaoshp/hebingtif/sddimao31.tif"
# Step 1: 合并矢量
vector_path = merge_vectors_seven_categories(input_files, output_vector_file)
# Step 2: 如果合并成功,进行分块栅格化
if vector_path:
rasterize_vector_by_blocks(vector_path, output_raster_file, raster_resolution=10, block_size=5000)

View File

@ -0,0 +1,111 @@
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
import numpy as np
from shapely.geometry import mapping
def merge_and_rasterize_vectors_two_categories(file_paths, output_vector_path, output_raster_path,
raster_resolution=30):
"""
合并2个矢量图层forest, water转为栅格并显示2个类别保留0值作为背景
参数:
file_paths: 包含2个矢量文件路径的列表 (按顺序: forest, water)
output_vector_path: 输出合并后的矢量文件路径
output_raster_path: 输出栅格文件路径
raster_resolution: 输出栅格的分辨率默认30米
"""
try:
# 检查输入文件数量
if len(file_paths) != 2:
raise ValueError("请提供正好2个矢量文件路径")
# 定义类别映射,按照指定顺序
categories = ['forest', 'water']
category_values = {'forest': 1, 'water': 2} # forest=1, water=2
# 读取所有矢量文件并添加分类
gdfs = []
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
print(f"正在读取第{i}个矢量文件: {path}")
gdf = gpd.read_file(path)
gdf['category'] = category # 添加类别字段
gdf['value'] = category_values[category] # 添加数值字段用于栅格化
gdfs.append(gdf)
# 检查和统一坐标系(以第一个图层为基准)
base_crs = gdfs[0].crs
for i, gdf in enumerate(gdfs[1:], 2):
if gdf.crs != base_crs:
print(f"{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
gdfs[i - 1] = gdf.to_crs(base_crs)
# 合并所有GeoDataFrame
print("正在合并图层...")
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
merged_gdf = merged_gdf.set_geometry('geometry')
# 保存合并后的矢量文件
print(f"正在保存合并矢量结果到: {output_vector_path}")
merged_gdf.to_file(output_vector_path)
# 计算栅格化的范围
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
width = int((bounds[2] - bounds[0]) / raster_resolution)
height = int((bounds[3] - bounds[1]) / raster_resolution)
# 创建栅格化变换
transform = rasterio.transform.from_bounds(
bounds[0], bounds[1], bounds[2], bounds[3], width, height
)
# 栅格化:将几何对象转为栅格,使用'value'字段作为像素值
print("正在将矢量转为栅格...")
shapes = ((mapping(geom), value) for geom, value in zip(merged_gdf.geometry, merged_gdf['value']))
raster = rasterize(
shapes=shapes,
out_shape=(height, width),
transform=transform,
fill=0, # 背景值为0
dtype=rasterio.uint8
)
# 保存栅格文件
with rasterio.open(
output_raster_path,
'w',
driver='GTiff',
height=height,
width=width,
count=1,
dtype=rasterio.uint8,
crs=base_crs,
transform=transform,
nodata=0 # 设置nodata值为0表示背景
) as dst:
dst.write(raster, 1)
print("处理完成!")
print(f"合并矢量保存为: {output_vector_path}")
print(f"栅格保存为: {output_raster_path}")
print(f"栅格中类别值: 0=背景, 1=forest, 2=water")
except Exception as e:
print(f"发生错误: {str(e)}")
# 使用示例
if __name__ == "__main__":
# 输入2个矢量文件路径按顺序: forest, water
input_files = [
r"E:\Data\z18\sd\Total_sd\shandonghebingshp\sdshp\forest\sdforest.shp",
r"E:\Data\z18\sd\Total_sd\shandonghebingshp\sdshp\water\true\shandongsuiyu3.shp"
]
output_vector_file = r"E:\njds\shenyang\shengyangshp\kejianshe\merged_vector_forest_water.shp"
output_raster_file = r"E:\njds\shenyang\shengyangshp\kejianshe\merged_raster_forest_water.tif"
# 执行合并和栅格化
merge_and_rasterize_vectors_two_categories(input_files, output_vector_file, output_raster_file,
raster_resolution=10)

View File

@ -0,0 +1,113 @@
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
import numpy as np
from shapely.geometry import mapping
def merge_and_rasterize_vectors_four_categories(file_paths, output_vector_path, output_raster_path,
raster_resolution=30):
"""
合并4个矢量图层bareland, grass, shrub, building转为栅格并显示4个类别保留0值作为背景
参数:
file_paths: 包含4个矢量文件路径的列表 (按顺序: bareland, grass, shrub, building)
output_vector_path: 输出合并后的矢量文件路径
output_raster_path: 输出栅格文件路径
raster_resolution: 输出栅格的分辨率默认30米
"""
try:
# 检查输入文件数量
if len(file_paths) != 4:
raise ValueError("请提供正好4个矢量文件路径")
# 定义类别映射,按照指定顺序
categories = ['bareland', 'grass', 'shrub', 'building']
category_values = {'bareland': 1, 'grass': 2, 'shrub': 3, 'building': 4} # bareland=1, grass=2, shrub=3, building=4
# 读取所有矢量文件并添加分类
gdfs = []
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
print(f"正在读取第{i}个矢量文件: {path}")
gdf = gpd.read_file(path)
gdf['category'] = category # 添加类别字段
gdf['value'] = category_values[category] # 添加数值字段用于栅格化
gdfs.append(gdf)
# 检查和统一坐标系(以第一个图层为基准)
base_crs = gdfs[0].crs
for i, gdf in enumerate(gdfs[1:], 2):
if gdf.crs != base_crs:
print(f"{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
gdfs[i - 1] = gdf.to_crs(base_crs)
# 合并所有GeoDataFrame
print("正在合并图层...")
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
merged_gdf = merged_gdf.set_geometry('geometry')
# 保存合并后的矢量文件
print(f"正在保存合并矢量结果到: {output_vector_path}")
merged_gdf.to_file(output_vector_path)
# 计算栅格化的范围
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
width = int((bounds[2] - bounds[0]) / raster_resolution)
height = int((bounds[3] - bounds[1]) / raster_resolution)
# 创建栅格化变换
transform = rasterio.transform.from_bounds(
bounds[0], bounds[1], bounds[2], bounds[3], width, height
)
# 栅格化:将几何对象转为栅格,使用'value'字段作为像素值
print("正在将矢量转为栅格...")
shapes = ((mapping(geom), value) for geom, value in zip(merged_gdf.geometry, merged_gdf['value']))
raster = rasterize(
shapes=shapes,
out_shape=(height, width),
transform=transform,
fill=0, # 背景值为0
dtype=rasterio.uint8
)
# 保存栅格文件
with rasterio.open(
output_raster_path,
'w',
driver='GTiff',
height=height,
width=width,
count=1,
dtype=rasterio.uint8,
crs=base_crs,
transform=transform,
nodata=0 # 设置nodata值为0表示背景
) as dst:
dst.write(raster, 1)
print("处理完成!")
print(f"合并矢量保存为: {output_vector_path}")
print(f"栅格保存为: {output_raster_path}")
print(f"栅格中类别值: 0=背景, 1=bareland, 2=grass, 3=shrub, 4=building")
except Exception as e:
print(f"发生错误: {str(e)}")
# 使用示例
if __name__ == "__main__":
# 输入4个矢量文件路径按顺序: bareland, grass, shrub, building
input_files = [
r"E:\njds\shenyang\shengyangshp\bareland\bareland.shp",
r"E:\njds\shenyang\shengyangshp\grass\grass2.shp",
r"E:\njds\shenyang\shengyangshp\shrub\shrub.shp",
r"E:\Data\z18\sd\Total_sd\shandonghebingshp\sdshp\jianzhu\true\shandongjianzhu3.shp"
]
output_vector_file = r"E:\njds\shenyang\shengyangshp\kejianshe\merged_vector_four.shp"
output_raster_file = r"E:\njds\shenyang\shengyangshp\kejianshe\merged_raster_four.tif"
# 执行合并和栅格化
merge_and_rasterize_vectors_four_categories(input_files, output_vector_file, output_raster_file,
raster_resolution=10)

View File

@ -0,0 +1,112 @@
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
import numpy as np
from shapely.geometry import mapping
def merge_and_rasterize_vectors_three_categories(file_paths, output_vector_path, output_raster_path,
raster_resolution=30):
"""
合并3个矢量图层bareland, grass, shrub转为栅格并显示3个类别保留0值作为背景
参数:
file_paths: 包含3个矢量文件路径的列表 (按顺序: bareland, grass, shrub)
output_vector_path: 输出合并后的矢量文件路径
output_raster_path: 输出栅格文件路径
raster_resolution: 输出栅格的分辨率默认30米
"""
try:
# 检查输入文件数量
if len(file_paths) != 3:
raise ValueError("请提供正好3个矢量文件路径")
# 定义类别映射,按照指定顺序
categories = ['bareland', 'grass', 'shrub']
category_values = {'bareland': 1, 'grass': 2, 'shrub': 3} # bareland=1, grass=2, shrub=3
# 读取所有矢量文件并添加分类
gdfs = []
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
print(f"正在读取第{i}个矢量文件: {path}")
gdf = gpd.read_file(path)
gdf['category'] = category # 添加类别字段
gdf['value'] = category_values[category] # 添加数值字段用于栅格化
gdfs.append(gdf)
# 检查和统一坐标系(以第一个图层为基准)
base_crs = gdfs[0].crs
for i, gdf in enumerate(gdfs[1:], 2):
if gdf.crs != base_crs:
print(f"{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
gdfs[i - 1] = gdf.to_crs(base_crs)
# 合并所有GeoDataFrame
print("正在合并图层...")
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
merged_gdf = merged_gdf.set_geometry('geometry')
# 保存合并后的矢量文件
print(f"正在保存合并矢量结果到: {output_vector_path}")
merged_gdf.to_file(output_vector_path)
# 计算栅格化的范围
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
width = int((bounds[2] - bounds[0]) / raster_resolution)
height = int((bounds[3] - bounds[1]) / raster_resolution)
# 创建栅格化变换
transform = rasterio.transform.from_bounds(
bounds[0], bounds[1], bounds[2], bounds[3], width, height
)
# 栅格化:将几何对象转为栅格,使用'value'字段作为像素值
print("正在将矢量转为栅格...")
shapes = ((mapping(geom), value) for geom, value in zip(merged_gdf.geometry, merged_gdf['value']))
raster = rasterize(
shapes=shapes,
out_shape=(height, width),
transform=transform,
fill=0, # 背景值为0
dtype=rasterio.uint8
)
# 保存栅格文件
with rasterio.open(
output_raster_path,
'w',
driver='GTiff',
height=height,
width=width,
count=1,
dtype=rasterio.uint8,
crs=base_crs,
transform=transform,
nodata=0 # 设置nodata值为0表示背景
) as dst:
dst.write(raster, 1)
print("处理完成!")
print(f"合并矢量保存为: {output_vector_path}")
print(f"栅格保存为: {output_raster_path}")
print(f"栅格中类别值: 0=背景, 1=bareland, 2=grass, 3=shrub")
except Exception as e:
print(f"发生错误: {str(e)}")
# 使用示例
if __name__ == "__main__":
# 输入3个矢量文件路径按顺序: bareland, grass, shrub
input_files = [
r"E:\njds\shenyang\shengyangshp\bareland\bareland.shp",
r"E:\njds\shenyang\shengyangshp\grass\grass2.shp",
r"E:\njds\shenyang\shengyangshp\shrub\shrub.shp"
]
output_vector_file = r"E:\njds\shenyang\shengyangshp\kejianshe\merged_vector_three.shp"
output_raster_file = r"E:\njds\shenyang\shengyangshp\kejianshe\merged_raster_three.tif"
# 执行合并和栅格化
merge_and_rasterize_vectors_three_categories(input_files, output_vector_file, output_raster_file,
raster_resolution=30)

View File

@ -0,0 +1,190 @@
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
import numpy as np
from shapely.geometry import mapping, box
import os
from tqdm import tqdm # 导入 tqdm 用于进度条
def merge_vectors_seven_categories(file_paths, output_vector_path):
"""
合并7个地貌类型矢量图层并保存为GeoPackage格式去掉cropland
参数:
file_paths: 包含7个矢量文件路径的列表 (按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland)
output_vector_path: 输出合并后的矢量文件路径使用.gpkg格式
"""
try:
# 检查输入文件数量是否为7
if len(file_paths) != 7:
raise ValueError("请提供正好7个矢量文件路径")
for path in file_paths:
if not os.path.exists(path):
raise FileNotFoundError(f"文件不存在: {path}")
# 定义7个类别去掉cropland
categories = ['forest', 'water', 'shrub', 'wetland', 'jianzhu', 'grass', 'bareland']
category_values = {cat: i + 1 for i, cat in enumerate(categories)} # forest=1, water=2, ..., bareland=7
gdfs = []
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
print(f"正在读取第{i}个矢量文件: {path}")
gdf = gpd.read_file(path, engine="pyogrio")
gdf['category'] = category
gdf['value'] = category_values[category]
if 'Shape_Area' in gdf.columns:
gdf['Shape_Area'] = gdf['Shape_Area'].clip(upper=1e9)
gdfs.append(gdf)
# 统一坐标系
base_crs = gdfs[0].crs
for i, gdf in enumerate(gdfs[1:], 2):
if gdf.crs != base_crs:
print(f"{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
gdfs[i - 1] = gdf.to_crs(base_crs)
print("正在合并图层...")
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
merged_gdf = merged_gdf.set_geometry('geometry')
if not output_vector_path.endswith('.gpkg'):
output_vector_path = output_vector_path.replace('.shp', '.gpkg')
print("输出格式已更改为GeoPackage以支持大文件")
print(f"正在保存合并矢量结果到: {output_vector_path}")
merged_gdf.to_file(output_vector_path, driver='GPKG', engine="pyogrio")
print("矢量合并完成!")
print(f"合并矢量保存为: {output_vector_path}")
return output_vector_path
except Exception as e:
print(f"发生错误: {str(e)}")
return None
def rasterize_vector_by_blocks(input_vector_path, output_raster_path, raster_resolution=30, block_size=5000):
"""
将合并后的矢量文件分块转为栅格显示7个类别保留0值作为背景添加进度条不清理数据
参数:
input_vector_path: 输入的合并矢量文件路径.gpkg格式
output_raster_path: 输出栅格文件路径
raster_resolution: 输出栅格的分辨率默认30米
block_size: 每个分块的宽度和高度像素单位默认5000
"""
try:
# 读取合并后的矢量文件
print(f"正在读取合并矢量文件: {input_vector_path}")
merged_gdf = gpd.read_file(input_vector_path, engine="pyogrio")
# 检查 value 字段是否有效
if 'value' not in merged_gdf.columns:
raise ValueError("矢量文件中缺少 'value' 字段,请确保文件包含正确的分类值")
if not merged_gdf['value'].between(1, 7).all():
print("警告: 'value' 字段包含无效值(应在 1-7 之间),可能导致数据丢失")
merged_gdf = merged_gdf[merged_gdf['value'].between(1, 7)]
# 按 value 字段排序,优先级高的类别(例如 value 较大)后处理,避免被覆盖
print("正在按 'value' 字段排序以确保优先级...")
merged_gdf = merged_gdf.sort_values(by='value', ascending=True)
# 计算总范围和栅格尺寸
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
total_width = int((bounds[2] - bounds[0]) / raster_resolution)
total_height = int((bounds[3] - bounds[1]) / raster_resolution)
if total_width <= 0 or total_height <= 0:
raise ValueError(f"栅格尺寸无效: 宽度={total_width}, 高度={total_height}")
# 创建栅格变换
transform = rasterio.transform.from_bounds(bounds[0], bounds[1], bounds[2], bounds[3], total_width,
total_height)
# 计算总分块数以设置进度条
total_blocks = ((total_height + block_size - 1) // block_size) * ((total_width + block_size - 1) // block_size)
print(f"总分块数: {total_blocks}")
# 创建并写入栅格文件,逐块写入以减少内存占用
with rasterio.open(
output_raster_path,
'w',
driver='GTiff',
height=total_height,
width=total_width,
count=1,
dtype=rasterio.uint8,
crs=merged_gdf.crs,
transform=transform,
nodata=0
) as dst:
# 分块处理并添加进度条
with tqdm(total=total_blocks, desc="栅格化进度") as pbar:
for y in range(0, total_height, block_size):
for x in range(0, total_width, block_size):
block_height = min(block_size, total_height - y)
block_width = min(block_size, total_width - x)
# 计算当前分块的地理范围
block_minx = bounds[0] + x * raster_resolution
block_maxy = bounds[3] - y * raster_resolution
block_maxx = block_minx + block_width * raster_resolution
block_miny = block_maxy - block_height * raster_resolution
# 创建分块边界框
block_bounds = box(block_minx, block_miny, block_maxx, block_maxy)
block_gdf = merged_gdf[merged_gdf.geometry.intersects(block_bounds)].copy()
if block_gdf.empty:
pbar.update(1)
continue
# 分块栅格化
block_transform = rasterio.transform.from_bounds(
block_minx, block_miny, block_maxx, block_maxy, block_width, block_height
)
shapes = ((mapping(geom), value) for geom, value in zip(block_gdf.geometry, block_gdf['value']))
block_raster = rasterize(
shapes=shapes,
out_shape=(block_height, block_width),
transform=block_transform,
fill=0,
dtype=rasterio.uint8,
all_touched=True # 确保所有触及的像素都被计入,避免小对象丢失
)
# 直接写入当前分块到文件中
dst.write(block_raster, 1, window=((y, y + block_height), (x, x + block_width)))
print(f"已完成分块栅格化: x={x}, y={y}, 宽度={block_width}, 高度={block_height}")
pbar.update(1)
print("栅格化完成!")
print(f"栅格保存为: {output_raster_path}")
print(f"栅格中类别值: 0=背景, 1=forest, 2=water, 3=shrub, 4=wetland, 5=jianzhu, 6=grass, 7=bareland")
except Exception as e:
print(f"发生错误: {str(e)}")
# 使用示例
if __name__ == "__main__":
input_files = [
r"sddimaoshp/forest/sdforest.shp",
r"sddimaoshp/water/sdwater.shp",
r"sddimaoshp/shrub/sdshrub.shp",
r"sddimaoshp/wetland/sdwetland.shp",
r"sddimaoshp/jianzhu/jianzhu2.shp",
r"sddimaoshp/grass/sdgrass.shp",
r"sddimaoshp/bareland/sdbareland.shp"
]
output_vector_file = r"sddimaoshp/hebingtif/shandongdimiao_30m.gpkg"
output_raster_file = r"sddimaoshp/hebingtif/sddimao_30m.tif"
# Step 1: 合并矢量
vector_path = merge_vectors_seven_categories(input_files, output_vector_file)
# Step 2: 如果合并成功,进行分块栅格化
if vector_path:
rasterize_vector_by_blocks(vector_path, output_raster_file, raster_resolution=30, block_size=5000)

View File

@ -0,0 +1,153 @@
import numpy as np
import sys
import os
# 尝试多种方式导入GDAL
try:
# 尝试直接导入GDAL
import gdal
import ogr
import osr
print("成功导入GDAL库")
except ImportError:
try:
# 尝试从osgeo导入
from osgeo import gdal, ogr, osr
print("从osgeo成功导入GDAL库")
except ImportError:
print("无法导入GDAL库请确保已安装GDAL及其Python绑定")
sys.exit(1)
def process_rasters(shp_path, raster_path, output_path):
"""
使用shp文件裁剪栅格保留不重叠部分设为value=8然后与原栅格合并
参数:
shp_path: shapefile的路径
raster_path: 原始栅格文件路径 (value 1-7)
output_path: 输出栅格文件路径
"""
try:
# 设置GDAL错误处理
gdal.UseExceptions()
# 打开栅格文件
raster = gdal.Open(raster_path)
if raster is None:
print(f"无法打开栅格文件: {raster_path}")
return None
# 获取栅格信息
print(f"栅格大小: {raster.RasterXSize} x {raster.RasterYSize}, {raster.RasterCount} 波段")
geo_transform = raster.GetGeoTransform()
projection = raster.GetProjection()
print(f"栅格投影: {projection}")
# 读取栅格数据
band = raster.GetRasterBand(1)
raster_data = band.ReadAsArray()
nodata = band.GetNoDataValue()
if nodata is None:
nodata = 0
# 打开shapefile
driver = ogr.GetDriverByName("ESRI Shapefile")
try:
vector = driver.Open(shp_path, 0) # 0表示只读模式
except Exception as e:
print(f"无法打开shapefile: {e}")
return None
if vector is None:
print(f"无法打开shapefile: {shp_path}")
return None
# 获取shapefile信息
layer = vector.GetLayer()
feature_count = layer.GetFeatureCount()
print(f"Shapefile有 {feature_count} 个要素")
# 创建栅格掩码
# 使用GDAL内置的栅格化功能将shapefile转换为栅格
print("将shapefile栅格化...")
memory_driver = gdal.GetDriverByName('MEM')
mask_ds = memory_driver.Create('', raster.RasterXSize, raster.RasterYSize, 1, gdal.GDT_Byte)
mask_ds.SetGeoTransform(geo_transform)
mask_ds.SetProjection(projection)
# 栅格化
mask_band = mask_ds.GetRasterBand(1)
mask_band.Fill(0) # 初始化为0
# 将shapefile栅格化到掩码上
gdal.RasterizeLayer(mask_ds, [1], layer, burn_values=[1])
# 读取掩码数据
mask_data = mask_band.ReadAsArray()
# 栅格有值区域的掩码 (value 1-7)
valid_data_mask = (raster_data >= 1) & (raster_data <= 7)
# 获取被shapefile覆盖但不与原始有效栅格区域重叠的部分
shp_mask = mask_data > 0 # shapefile 覆盖区域
non_overlapping_mask = shp_mask & (~valid_data_mask) # shapefile 覆盖但不与原始栅格重叠区域
# 合并两个栅格
merged_raster = raster_data.copy()
merged_raster[non_overlapping_mask] = 8
# 创建输出栅格
driver = gdal.GetDriverByName('GTiff')
out_ds = driver.Create(output_path, raster.RasterXSize, raster.RasterYSize, 1, band.DataType)
out_ds.SetGeoTransform(geo_transform)
out_ds.SetProjection(projection)
# 写入数据
out_band = out_ds.GetRasterBand(1)
out_band.WriteArray(merged_raster)
out_band.SetNoDataValue(nodata)
# 关闭数据集
out_ds = None
mask_ds = None
raster = None
vector = None
print(f"处理完成,结果保存至: {output_path}")
# 返回处理后的统计信息
stats = {
"原始栅格有效区域(value 1-7)像素数": int(np.sum(valid_data_mask)),
"shapefile覆盖区域像素数": int(np.sum(shp_mask)),
"不重叠区域(value=8)像素数": int(np.sum(non_overlapping_mask)),
"结果栅格总有效像素数": int(np.sum(merged_raster > 0))
}
return stats
except Exception as e:
print(f"发生错误: {e}")
import traceback
traceback.print_exc()
return None
# 使用示例
if __name__ == "__main__":
# 输入文件路径
shp_path = r"E:\Data\z18\shandongshp\sd.shp"
raster_path = r"E:\Data\z18\sd\sd_y10m\sddimao_10m.tif"
output_path = r"E:\Data\z18\sd\sd_10m\sddimao_10mmerged.tif"
# 执行处理
stats = process_rasters(shp_path, raster_path, output_path)
# 打印统计信息
if stats:
for key, value in stats.items():
print(f"{key}: {value}")
else:
print("处理失败,无统计信息可显示")

View File

@ -0,0 +1,125 @@
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
from rasterio.windows import Window
import numpy as np
from shapely.geometry import mapping
import os
def large_vector_to_raster(input_shp_path, output_raster_path, raster_resolution=30, chunk_size=1000, window_size=1000, category_field='value', category_value=1):
"""
将大型面要素转为栅格数据分块读取矢量并按窗口写入栅格适用于大文件例如2GB
参数:
input_shp_path: 输入shapefile路径
output_raster_path: 输出栅格文件路径
raster_resolution: 输出栅格分辨率单位默认30
chunk_size: 每次读取的矢量特征数量默认1000
window_size: 每个栅格窗口的像素大小默认1000x1000
category_field: 矢量数据中用于栅格化的字段名默认'value'
category_value: 默认类别值若无字段则所有特征赋此值默认1
"""
try:
# 检查输入文件是否存在
if not os.path.exists(input_shp_path):
raise FileNotFoundError(f"输入文件 {input_shp_path} 不存在")
# 读取矢量文件的元信息以确定范围和CRS
print("正在读取矢量元信息...")
gdf_meta = gpd.read_file(input_shp_path, rows=1) # 只读一行获取元数据
base_crs = gdf_meta.crs
bounds = gdf_meta.total_bounds # [minx, miny, maxx, maxy]
# 计算栅格尺寸
width = int((bounds[2] - bounds[0]) / raster_resolution)
height = int((bounds[3] - bounds[1]) / raster_resolution)
transform = rasterio.transform.from_bounds(bounds[0], bounds[1], bounds[2], bounds[3], width, height)
# 初始化输出栅格文件
print(f"初始化输出栅格文件: {output_raster_path}")
with rasterio.open(
output_raster_path,
'w',
driver='GTiff',
height=height,
width=width,
count=1,
dtype=rasterio.uint8,
crs=base_crs,
transform=transform,
nodata=0 # 背景值为0
) as dst:
# 分窗口处理
for row_offset in range(0, height, window_size):
for col_offset in range(0, width, window_size):
window_height = min(window_size, height - row_offset)
window_width = min(window_size, width - col_offset)
window = Window(col_offset, row_offset, window_width, window_height)
# 计算当前窗口的地理范围
window_transform = rasterio.windows.transform(window, transform)
window_bounds = rasterio.windows.bounds(window, transform)
# 创建窗口的边界几何,用于筛选矢量数据
window_poly = gpd.GeoSeries.from_wkt([f"POLYGON(({window_bounds[0]} {window_bounds[1]}, "
f"{window_bounds[2]} {window_bounds[1]}, "
f"{window_bounds[2]} {window_bounds[3]}, "
f"{window_bounds[0]} {window_bounds[3]}, "
f"{window_bounds[0]} {window_bounds[1]}))"]).iloc[0]
# 分块读取矢量数据并筛选当前窗口内的特征
print(f"处理窗口: row={row_offset}, col={col_offset}")
chunk_raster = np.zeros((window_height, window_width), dtype=rasterio.uint8)
chunk_iterator = gpd.read_file(input_shp_path, chunksize=chunk_size)
for chunk_idx, chunk_gdf in enumerate(chunk_iterator):
# 筛选与当前窗口相交的特征
window_gdf = chunk_gdf[chunk_gdf.intersects(window_poly)]
if len(window_gdf) == 0:
continue
print(f" - 处理分块 {chunk_idx},筛选出 {len(window_gdf)} 个特征")
# 如果没有指定category_field则赋默认值
if category_field not in window_gdf.columns:
window_gdf['value'] = category_value
# 栅格化当前分块
shapes = ((mapping(geom), value) for geom, value in zip(window_gdf.geometry, window_gdf[category_field]))
temp_raster = rasterize(
shapes=shapes,
out_shape=(window_height, window_width),
transform=window_transform,
fill=0,
dtype=rasterio.uint8
)
# 更新窗口栅格保留非0值
mask = temp_raster > 0
chunk_raster[mask] = temp_raster[mask]
# 写入当前窗口到栅格文件
if np.any(chunk_raster > 0):
dst.write(chunk_raster, 1, window=window)
print("处理完成!")
print(f"栅格保存为: {output_raster_path}")
print(f"栅格中类别值: 0=背景, 其他值由 {category_field} 字段或默认值 {category_value} 确定")
except Exception as e:
print(f"发生错误: {str(e)}")
# 使用示例
if __name__ == "__main__":
input_shp = r"E:\large_data\large_vector.shp" # 替换为您的2GB矢量文件路径
output_raster = r"E:\large_data\large_raster.tif" # 输出栅格路径
large_vector_to_raster(
input_shp_path=input_shp,
output_raster_path=output_raster,
raster_resolution=30, # 分辨率30米
chunk_size=1000, # 每次读取1000个特征
window_size=1000, # 每个窗口1000x1000像素
category_field='value', # 假设矢量数据中有'value'字段,若无则用默认值
category_value=1 # 默认类别值
)