This commit is contained in:
parent
0e9d2ef236
commit
adca4f89af
|
@ -0,0 +1,189 @@
|
|||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
import rasterio
|
||||
from rasterio.features import rasterize
|
||||
import numpy as np
|
||||
from shapely.geometry import mapping, box
|
||||
from tqdm import tqdm
|
||||
import os
|
||||
|
||||
def merge_and_rasterize_vectors_seven_categories(file_paths, output_vector_path, output_raster_path,
|
||||
raster_resolution=10, block_size=2000, chunk_size=10000):
|
||||
"""
|
||||
合并7个地貌类型矢量图层,转为栅格并显示7个类别,保留0值作为背景,优化合并策略(去掉cropland)
|
||||
|
||||
参数:
|
||||
file_paths: 包含7个矢量文件路径的列表 (按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland)
|
||||
output_vector_path: 输出合并后的矢量文件路径(建议使用.gpkg格式)
|
||||
output_raster_path: 输出栅格文件路径
|
||||
raster_resolution: 输出栅格的分辨率(默认10米)
|
||||
block_size: 每个分块的宽度和高度(像素单位,默认2000)
|
||||
chunk_size: 每次处理矢量数据的块大小(默认10000条记录)
|
||||
"""
|
||||
try:
|
||||
# 检查输入文件数量
|
||||
if len(file_paths) != 7:
|
||||
raise ValueError("请提供正好7个矢量文件路径")
|
||||
|
||||
# 定义类别映射,按照指定顺序(去掉cropland)
|
||||
categories = ['forest', 'water', 'shrub', 'wetland', 'jianzhu', 'grass', 'bareland']
|
||||
category_values = {cat: i + 1 for i, cat in enumerate(categories)} # forest=1, water=2, ..., bareland=7
|
||||
|
||||
# 逐块读取矢量文件,合并到内存中
|
||||
gdfs = []
|
||||
base_crs = None
|
||||
for i, (path, category) in enumerate(tqdm(zip(file_paths, categories), total=7, desc="读取矢量文件进度"), 1):
|
||||
print(f"正在处理第{i}个矢量文件: {path}")
|
||||
gdf = gpd.read_file(path, engine="pyogrio")
|
||||
gdf['category'] = category # 添加类别字段
|
||||
gdf['value'] = category_values[category] # 添加数值字段用于栅格化
|
||||
gdf['value'] = gdf['value'].astype(int) # 确保 value 为整数类型
|
||||
if 'Shape_Area' in gdf.columns:
|
||||
gdf['Shape_Area'] = gdf['Shape_Area'].clip(upper=1e9) # 限制最大值,避免溢出
|
||||
|
||||
# 统一坐标系(以第一个图层为基准)
|
||||
if i == 1:
|
||||
base_crs = gdf.crs
|
||||
else:
|
||||
if gdf.crs != base_crs:
|
||||
print(f"第{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
|
||||
gdf = gdf.to_crs(base_crs)
|
||||
|
||||
print(f"第{i}个图层 value 字段分布:", gdf['value'].value_counts(dropna=False))
|
||||
gdfs.append(gdf)
|
||||
|
||||
# 在内存中合并所有 gdf
|
||||
print("正在合并所有图层...")
|
||||
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=base_crs)
|
||||
print("合并后 value 字段分布:", merged_gdf['value'].value_counts(dropna=False))
|
||||
|
||||
# 保存合并后的矢量文件
|
||||
print(f"正在保存合并矢量结果到: {output_vector_path}")
|
||||
merged_gdf.to_file(output_vector_path, driver='GPKG', engine="pyogrio")
|
||||
|
||||
# 重新加载合并后的文件,检查 value 字段
|
||||
print("正在重新加载合并后的文件...")
|
||||
merged_gdf = gpd.read_file(output_vector_path, engine="pyogrio")
|
||||
print("重新加载后 value 字段分布:", merged_gdf['value'].value_counts(dropna=False))
|
||||
|
||||
# 数据清洗:处理 value 字段
|
||||
print("正在清洗 'value' 字段数据...")
|
||||
# 处理 nan 值
|
||||
if merged_gdf['value'].isna().any():
|
||||
print("警告: 'value' 字段包含 nan 值,将替换为 0")
|
||||
merged_gdf['value'] = merged_gdf['value'].fillna(0)
|
||||
# 转换为整数类型
|
||||
merged_gdf['value'] = merged_gdf['value'].astype(float).astype(int)
|
||||
# 确保值在 0-7 范围内
|
||||
valid_values = set(range(0, 8)) # 0=背景, 1=forest, ..., 7=bareland
|
||||
if not merged_gdf['value'].isin(valid_values).all():
|
||||
print(f"警告: 'value' 字段包含无效值: {set(merged_gdf['value'].unique()) - valid_values},将替换为 0")
|
||||
merged_gdf['value'] = merged_gdf['value'].apply(lambda x: x if x in valid_values else 0)
|
||||
|
||||
print("清洗后 value 字段分布:", merged_gdf['value'].value_counts(dropna=False))
|
||||
|
||||
# 构建空间索引以加速后续栅格化
|
||||
print("正在构建空间索引以加速栅格化...")
|
||||
sindex = merged_gdf.sindex
|
||||
|
||||
# 计算栅格化的范围
|
||||
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
|
||||
width = int((bounds[2] - bounds[0]) / raster_resolution)
|
||||
height = int((bounds[3] - bounds[1]) / raster_resolution)
|
||||
|
||||
# 检查栅格尺寸是否有效
|
||||
if width <= 0 or height <= 0:
|
||||
raise ValueError(f"栅格尺寸无效: 宽度={width}, 高度={height},检查范围或分辨率")
|
||||
|
||||
# 创建栅格化变换
|
||||
transform = rasterio.transform.from_bounds(
|
||||
bounds[0], bounds[1], bounds[2], bounds[3], width, height
|
||||
)
|
||||
|
||||
# 计算总分块数以设置进度条
|
||||
total_blocks = ((height + block_size - 1) // block_size) * ((width + block_size - 1) // block_size)
|
||||
print(f"总分块数: {total_blocks}")
|
||||
|
||||
# 创建并写入栅格文件(逐块写入,避免一次性占用大量内存)
|
||||
with rasterio.open(
|
||||
output_raster_path,
|
||||
'w',
|
||||
driver='GTiff',
|
||||
height=height,
|
||||
width=width,
|
||||
count=1,
|
||||
dtype=rasterio.uint8,
|
||||
crs=base_crs,
|
||||
transform=transform,
|
||||
nodata=0
|
||||
) as dst:
|
||||
# 分块处理并添加进度条
|
||||
with tqdm(total=total_blocks, desc="栅格化进度") as pbar:
|
||||
for y in range(0, height, block_size):
|
||||
for x in range(0, width, block_size):
|
||||
block_height = min(block_size, height - y)
|
||||
block_width = min(block_size, width - x)
|
||||
|
||||
# 计算当前分块的地理范围
|
||||
block_minx = bounds[0] + x * raster_resolution
|
||||
block_maxy = bounds[3] - y * raster_resolution
|
||||
block_maxx = block_minx + block_width * raster_resolution
|
||||
block_miny = block_maxy - block_height * raster_resolution
|
||||
|
||||
# 创建分块边界框并使用空间索引查询
|
||||
block_bounds = box(block_minx, block_miny, block_maxx, block_maxy)
|
||||
indices = sindex.intersection(block_bounds.bounds)
|
||||
block_gdf = merged_gdf.iloc[list(indices)].copy()
|
||||
|
||||
if block_gdf.empty:
|
||||
pbar.update(1)
|
||||
continue
|
||||
|
||||
# 打印分块中的 value 分布
|
||||
print(f"分块 (x={x}, y={y}) value 字段分布:", block_gdf['value'].value_counts(dropna=False))
|
||||
|
||||
# 分块栅格化
|
||||
block_transform = rasterio.transform.from_bounds(
|
||||
block_minx, block_miny, block_maxx, block_maxy, block_width, block_height
|
||||
)
|
||||
shapes = [(mapping(geom), value) for geom, value in zip(block_gdf.geometry, block_gdf['value'])]
|
||||
block_raster = rasterize(
|
||||
shapes=shapes,
|
||||
out_shape=(block_height, block_width),
|
||||
transform=block_transform,
|
||||
fill=0,
|
||||
dtype=rasterio.uint8
|
||||
)
|
||||
|
||||
# 直接写入当前分块到文件中
|
||||
dst.write(block_raster, 1, window=((y, y + block_height), (x, x + block_width)))
|
||||
pbar.update(1)
|
||||
|
||||
print("处理完成!")
|
||||
print(f"合并矢量保存为: {output_vector_path}")
|
||||
print(f"栅格保存为: {output_raster_path}")
|
||||
print(
|
||||
f"栅格中类别值: 0=背景, 1=forest, 2=water, 3=shrub, 4=wetland, 5=jianzhu, 6=grass, 7=bareland")
|
||||
|
||||
except Exception as e:
|
||||
print(f"发生错误: {str(e)}")
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
# 输入7个矢量文件路径(按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland,去掉cropland)
|
||||
input_files = [
|
||||
r"weifang/weifang/forest/forest31.shp",
|
||||
r"weifang/weifang/water/water1.shp",
|
||||
r"weifang/weifang/shrub/shrub.shp",
|
||||
r"weifang/weifang/wetland/wetland.shp",
|
||||
r"weifang/weifang/jianzhu/jianzhu1.shp",
|
||||
r"weifang/weifang/grass/grass.shp",
|
||||
r"weifang/weifang/bareland/bareland.shp"
|
||||
]
|
||||
output_vector_file = r"weifang/weifang/weifanghebing/weifangdimiao.gpkg" # 改为.gpkg
|
||||
output_raster_file = r"weifang/weifang/weifanghebing/weifangdimao.tif"
|
||||
|
||||
# 执行合并和栅格化
|
||||
merge_and_rasterize_vectors_seven_categories(input_files, output_vector_file, output_raster_file,
|
||||
raster_resolution=10, block_size=2000, chunk_size=10000)
|
|
@ -0,0 +1,229 @@
|
|||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
import rasterio
|
||||
from rasterio.features import rasterize
|
||||
import numpy as np
|
||||
from shapely.geometry import mapping, box
|
||||
import os
|
||||
from tqdm import tqdm # 导入 tqdm 用于进度条
|
||||
|
||||
|
||||
def merge_vectors_seven_categories(file_paths, output_vector_path):
|
||||
"""
|
||||
合并7个地貌类型矢量图层并保存为GeoPackage格式(去掉cropland)
|
||||
|
||||
参数:
|
||||
file_paths: 包含7个矢量文件路径的列表 (按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland)
|
||||
output_vector_path: 输出合并后的矢量文件路径(使用.gpkg格式)
|
||||
"""
|
||||
try:
|
||||
# 检查输入文件数量是否为7
|
||||
if len(file_paths) != 7:
|
||||
raise ValueError("请提供正好7个矢量文件路径")
|
||||
|
||||
for path in file_paths:
|
||||
if not os.path.exists(path):
|
||||
raise FileNotFoundError(f"文件不存在: {path}")
|
||||
|
||||
# 定义7个类别(去掉cropland)
|
||||
categories = ['forest', 'water', 'shrub', 'wetland', 'jianzhu', 'grass', 'bareland']
|
||||
category_values = {cat: i + 1 for i, cat in enumerate(categories)} # forest=1, water=2, ..., bareland=7
|
||||
|
||||
gdfs = []
|
||||
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
|
||||
print(f"正在读取第{i}个矢量文件: {path}")
|
||||
gdf = gpd.read_file(path, engine="pyogrio")
|
||||
gdf['category'] = category
|
||||
gdf['value'] = category_values[category]
|
||||
if 'Shape_Area' in gdf.columns:
|
||||
gdf['Shape_Area'] = gdf['Shape_Area'].clip(upper=1e9)
|
||||
gdfs.append(gdf)
|
||||
|
||||
# 统一坐标系
|
||||
base_crs = gdfs[0].crs
|
||||
for i, gdf in enumerate(gdfs[1:], 2):
|
||||
if gdf.crs != base_crs:
|
||||
print(f"第{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
|
||||
gdfs[i - 1] = gdf.to_crs(base_crs)
|
||||
|
||||
print("正在合并图层...")
|
||||
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
|
||||
merged_gdf = merged_gdf.set_geometry('geometry')
|
||||
|
||||
if not output_vector_path.endswith('.gpkg'):
|
||||
output_vector_path = output_vector_path.replace('.shp', '.gpkg')
|
||||
print("输出格式已更改为GeoPackage以支持大文件")
|
||||
print(f"正在保存合并矢量结果到: {output_vector_path}")
|
||||
merged_gdf.to_file(output_vector_path, driver='GPKG', engine="pyogrio")
|
||||
|
||||
print("矢量合并完成!")
|
||||
print(f"合并矢量保存为: {output_vector_path}")
|
||||
return output_vector_path
|
||||
|
||||
except Exception as e:
|
||||
print(f"发生错误: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def rasterize_vector_by_blocks(input_vector_path, output_raster_path, raster_resolution=30, block_size=5000):
|
||||
"""
|
||||
将合并后的矢量文件分块转为栅格,显示7个类别,背景值设为 value=8,添加进度条和像素统计
|
||||
|
||||
参数:
|
||||
input_vector_path: 输入的合并矢量文件路径(.gpkg格式)
|
||||
output_raster_path: 输出栅格文件路径
|
||||
raster_resolution: 输出栅格的分辨率(默认30米)
|
||||
block_size: 每个分块的宽度和高度(像素单位,默认5000)
|
||||
"""
|
||||
try:
|
||||
# 读取合并后的矢量文件
|
||||
print(f"正在读取合并矢量文件: {input_vector_path}")
|
||||
merged_gdf = gpd.read_file(input_vector_path, engine="pyogrio")
|
||||
|
||||
# 检查 value 字段是否有效
|
||||
if 'value' not in merged_gdf.columns:
|
||||
raise ValueError("矢量文件中缺少 'value' 字段,请确保文件包含正确的分类值")
|
||||
print("合并矢量文件中的 value 字段分布:")
|
||||
print(merged_gdf['value'].value_counts(dropna=False))
|
||||
|
||||
if not merged_gdf['value'].between(1, 7).all():
|
||||
print("警告: 'value' 字段包含无效值(应在 1-7 之间),可能导致数据丢失")
|
||||
merged_gdf = merged_gdf[merged_gdf['value'].between(1, 7)]
|
||||
|
||||
# 检查合并后的矢量数据是否为空
|
||||
if merged_gdf.empty:
|
||||
raise ValueError("合并后的矢量数据为空,请检查输入数据是否有效")
|
||||
|
||||
# 按 value 字段排序,优先级高的类别(例如 value 较大)后处理,避免被覆盖
|
||||
print("正在按 'value' 字段排序以确保优先级...")
|
||||
merged_gdf = merged_gdf.sort_values(by='value', ascending=True)
|
||||
|
||||
# 计算总范围和栅格尺寸
|
||||
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
|
||||
total_width = int((bounds[2] - bounds[0]) / raster_resolution)
|
||||
total_height = int((bounds[3] - bounds[1]) / raster_resolution)
|
||||
|
||||
if total_width <= 0 or total_height <= 0:
|
||||
raise ValueError(f"栅格尺寸无效: 宽度={total_width}, 高度={total_height}")
|
||||
|
||||
print(f"栅格范围: minx={bounds[0]}, miny={bounds[1]}, maxx={bounds[2]}, maxy={bounds[3]}")
|
||||
print(f"栅格尺寸: 宽度={total_width}, 高度={total_height}")
|
||||
|
||||
# 创建栅格变换
|
||||
transform = rasterio.transform.from_bounds(bounds[0], bounds[1], bounds[2], bounds[3], total_width,
|
||||
total_height)
|
||||
|
||||
# 计算总分块数以设置进度条
|
||||
total_blocks = ((total_height + block_size - 1) // block_size) * ((total_width + block_size - 1) // block_size)
|
||||
print(f"总分块数: {total_blocks}")
|
||||
|
||||
# 创建并写入栅格文件,逐块写入以减少内存占用
|
||||
with rasterio.open(
|
||||
output_raster_path,
|
||||
'w',
|
||||
driver='GTiff',
|
||||
height=total_height,
|
||||
width=total_width,
|
||||
count=1,
|
||||
dtype=rasterio.uint8,
|
||||
crs=merged_gdf.crs,
|
||||
transform=transform,
|
||||
nodata=255 # 使用 255 作为 nodata 值,区分 value=8 的背景
|
||||
) as dst:
|
||||
# 分块处理并添加进度条
|
||||
with tqdm(total=total_blocks, desc="栅格化进度") as pbar:
|
||||
for y in range(0, total_height, block_size):
|
||||
for x in range(0, total_width, block_size):
|
||||
block_height = min(block_size, total_height - y)
|
||||
block_width = min(block_size, total_width - x)
|
||||
|
||||
# 计算当前分块的地理范围
|
||||
block_minx = bounds[0] + x * raster_resolution
|
||||
block_maxy = bounds[3] - y * raster_resolution
|
||||
block_maxx = block_minx + block_width * raster_resolution
|
||||
block_miny = block_maxy - block_height * raster_resolution
|
||||
|
||||
# 创建分块边界框
|
||||
block_bounds = box(block_minx, block_miny, block_maxx, block_maxy)
|
||||
block_gdf = merged_gdf[merged_gdf.geometry.intersects(block_bounds)].copy()
|
||||
|
||||
# 初始化分块为背景值 8
|
||||
block_raster = np.full((block_height, block_width), 8, dtype=np.uint8)
|
||||
|
||||
if not block_gdf.empty:
|
||||
# 打印分块中的 value 字段分布
|
||||
print(f"分块 (x={x}, y={y}) 中的 value 字段分布:")
|
||||
print(block_gdf['value'].value_counts(dropna=False))
|
||||
|
||||
# 分块栅格化
|
||||
block_transform = rasterio.transform.from_bounds(
|
||||
block_minx, block_miny, block_maxx, block_maxy, block_width, block_height
|
||||
)
|
||||
shapes = ((mapping(geom), value) for geom, value in
|
||||
zip(block_gdf.geometry, block_gdf['value']))
|
||||
block_raster = rasterize(
|
||||
shapes=shapes,
|
||||
out=block_raster, # 使用预初始化的数组
|
||||
transform=block_transform,
|
||||
fill=8, # 未覆盖区域为 8
|
||||
dtype=rasterio.uint8,
|
||||
all_touched=True # 确保所有触及的像素都被计入
|
||||
)
|
||||
|
||||
# 统计分块中 value=1 到 value=7 的像素点个数
|
||||
print(f"分块 (x={x}, y={y}) 中像素值统计:")
|
||||
unique, counts = np.unique(block_raster, return_counts=True)
|
||||
value_counts = dict(zip(unique, counts))
|
||||
for val in range(1, 8): # 检查 value=1 到 value=7
|
||||
count = value_counts.get(val, 0)
|
||||
print(f"Value={val}: {count} 像素")
|
||||
# 单独打印 value=8(背景值)
|
||||
background_count = value_counts.get(8, 0)
|
||||
print(f"Value=8 (background): {background_count} 像素")
|
||||
|
||||
# 直接写入当前分块到文件中
|
||||
dst.write(block_raster, 1, window=((y, y + block_height), (x, x + block_width)))
|
||||
print(f"已完成分块栅格化: x={x}, y={y}, 宽度={block_width}, 高度={block_height}")
|
||||
pbar.update(1)
|
||||
|
||||
# 读取整个栅格文件,统计最终像素分布
|
||||
with rasterio.open(output_raster_path) as src:
|
||||
final_raster = src.read(1)
|
||||
print("最终栅格文件中的像素值统计:")
|
||||
unique, counts = np.unique(final_raster, return_counts=True)
|
||||
value_counts = dict(zip(unique, counts))
|
||||
for val in range(1, 9): # 检查 value=1 到 value=8
|
||||
count = value_counts.get(val, 0)
|
||||
print(f"Value={val}: {count} 像素")
|
||||
# 打印 nodata 值(255)
|
||||
nodata_count = value_counts.get(255, 0)
|
||||
print(f"Nodata (255): {nodata_count} 像素")
|
||||
|
||||
print("栅格化完成!")
|
||||
print(f"栅格保存为: {output_raster_path}")
|
||||
print(f"栅格中类别值: 1=forest, 2=water, 3=shrub, 4=wetland, 5=jianzhu, 6=grass, 7=bareland, 8=background")
|
||||
|
||||
except Exception as e:
|
||||
print(f"发生错误: {str(e)}")
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
input_files = [
|
||||
r"sddimaoshp/forest/sdforest.shp",
|
||||
r"sddimaoshp/water/shandongsuiyu3.shp",
|
||||
r"sddimaoshp/shrub/sdshrub.shp",
|
||||
r"sddimaoshp/wetland/sdwetland.shp",
|
||||
r"sddimaoshp/jianzhu/shandongjianzhu3.shp",
|
||||
r"sddimaoshp/grass/sdgrass.shp",
|
||||
r"sddimaoshp/bareland/sdbareland.shp"
|
||||
]
|
||||
output_vector_file = r"sddimaoshp/hebingtif/shandongdimiao_10_1m.gpkg"
|
||||
output_raster_file = r"sddimaoshp/hebingtif/sddimao_10_1m.tif"
|
||||
|
||||
# Step 1: 合并矢量
|
||||
vector_path = merge_vectors_seven_categories(input_files, output_vector_file)
|
||||
|
||||
# Step 2: 如果合并成功,进行分块栅格化
|
||||
if vector_path:
|
||||
rasterize_vector_by_blocks(vector_path, output_raster_file, raster_resolution=10, block_size=5000)
|
|
@ -0,0 +1,191 @@
|
|||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
import rasterio
|
||||
from rasterio.features import rasterize
|
||||
import numpy as np
|
||||
from shapely.geometry import mapping, box
|
||||
import os
|
||||
from tqdm import tqdm # 导入 tqdm 用于进度条
|
||||
|
||||
|
||||
def merge_vectors_seven_categories(file_paths, output_vector_path):
|
||||
"""
|
||||
合并7个地貌类型矢量图层并保存为GeoPackage格式(去掉cropland)
|
||||
|
||||
参数:
|
||||
file_paths: 包含7个矢量文件路径的列表 (按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland)
|
||||
output_vector_path: 输出合并后的矢量文件路径(使用.gpkg格式)
|
||||
"""
|
||||
try:
|
||||
# 检查输入文件数量是否为7
|
||||
if len(file_paths) != 7:
|
||||
raise ValueError("请提供正好7个矢量文件路径")
|
||||
|
||||
for path in file_paths:
|
||||
if not os.path.exists(path):
|
||||
raise FileNotFoundError(f"文件不存在: {path}")
|
||||
|
||||
# 定义7个类别(去掉cropland)
|
||||
categories = ['forest', 'water', 'shrub', 'wetland', 'jianzhu', 'grass', 'bareland']
|
||||
category_values = {cat: i + 1 for i, cat in enumerate(categories)} # forest=1, water=2, ..., bareland=7
|
||||
|
||||
gdfs = []
|
||||
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
|
||||
print(f"正在读取第{i}个矢量文件: {path}")
|
||||
gdf = gpd.read_file(path, engine="pyogrio")
|
||||
gdf['category'] = category
|
||||
gdf['value'] = category_values[category]
|
||||
if 'Shape_Area' in gdf.columns:
|
||||
gdf['Shape_Area'] = gdf['Shape_Area'].clip(upper=1e9)
|
||||
gdfs.append(gdf)
|
||||
|
||||
# 统一坐标系
|
||||
base_crs = gdfs[0].crs
|
||||
for i, gdf in enumerate(gdfs[1:], 2):
|
||||
if gdf.crs != base_crs:
|
||||
print(f"第{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
|
||||
gdfs[i - 1] = gdf.to_crs(base_crs)
|
||||
|
||||
print("正在合并图层...")
|
||||
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
|
||||
merged_gdf = merged_gdf.set_geometry('geometry')
|
||||
|
||||
if not output_vector_path.endswith('.gpkg'):
|
||||
output_vector_path = output_vector_path.replace('.shp', '.gpkg')
|
||||
print("输出格式已更改为GeoPackage以支持大文件")
|
||||
print(f"正在保存合并矢量结果到: {output_vector_path}")
|
||||
merged_gdf.to_file(output_vector_path, driver='GPKG', engine="pyogrio")
|
||||
|
||||
print("矢量合并完成!")
|
||||
print(f"合并矢量保存为: {output_vector_path}")
|
||||
return output_vector_path
|
||||
|
||||
except Exception as e:
|
||||
print(f"发生错误: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def rasterize_vector_by_blocks(input_vector_path, output_raster_path, raster_resolution=30, block_size=5000):
|
||||
"""
|
||||
将合并后的矢量文件分块转为栅格,显示7个类别,背景值设为 value=8,添加进度条,不清理数据
|
||||
|
||||
参数:
|
||||
input_vector_path: 输入的合并矢量文件路径(.gpkg格式)
|
||||
output_raster_path: 输出栅格文件路径
|
||||
raster_resolution: 输出栅格的分辨率(默认30米)
|
||||
block_size: 每个分块的宽度和高度(像素单位,默认5000)
|
||||
"""
|
||||
try:
|
||||
# 读取合并后的矢量文件
|
||||
print(f"正在读取合并矢量文件: {input_vector_path}")
|
||||
merged_gdf = gpd.read_file(input_vector_path, engine="pyogrio")
|
||||
|
||||
# 检查 value 字段是否有效
|
||||
if 'value' not in merged_gdf.columns:
|
||||
raise ValueError("矢量文件中缺少 'value' 字段,请确保文件包含正确的分类值")
|
||||
if not merged_gdf['value'].between(1, 7).all():
|
||||
print("警告: 'value' 字段包含无效值(应在 1-7 之间),可能导致数据丢失")
|
||||
merged_gdf = merged_gdf[merged_gdf['value'].between(1, 7)]
|
||||
|
||||
# 按 value 字段排序,优先级高的类别(例如 value 较大)后处理,避免被覆盖
|
||||
print("正在按 'value' 字段排序以确保优先级...")
|
||||
merged_gdf = merged_gdf.sort_values(by='value', ascending=True)
|
||||
|
||||
# 计算总范围和栅格尺寸
|
||||
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
|
||||
total_width = int((bounds[2] - bounds[0]) / raster_resolution)
|
||||
total_height = int((bounds[3] - bounds[1]) / raster_resolution)
|
||||
|
||||
if total_width <= 0 or total_height <= 0:
|
||||
raise ValueError(f"栅格尺寸无效: 宽度={total_width}, 高度={total_height}")
|
||||
|
||||
# 创建栅格变换
|
||||
transform = rasterio.transform.from_bounds(bounds[0], bounds[1], bounds[2], bounds[3], total_width,
|
||||
total_height)
|
||||
|
||||
# 计算总分块数以设置进度条
|
||||
total_blocks = ((total_height + block_size - 1) // block_size) * ((total_width + block_size - 1) // block_size)
|
||||
print(f"总分块数: {total_blocks}")
|
||||
|
||||
# 创建并写入栅格文件,逐块写入以减少内存占用
|
||||
with rasterio.open(
|
||||
output_raster_path,
|
||||
'w',
|
||||
driver='GTiff',
|
||||
height=total_height,
|
||||
width=total_width,
|
||||
count=1,
|
||||
dtype=rasterio.uint8,
|
||||
crs=merged_gdf.crs,
|
||||
transform=transform,
|
||||
nodata=255 # 使用 255 作为 nodata 值,区分 value=8 的背景
|
||||
) as dst:
|
||||
# 分块处理并添加进度条
|
||||
with tqdm(total=total_blocks, desc="栅格化进度") as pbar:
|
||||
for y in range(0, total_height, block_size):
|
||||
for x in range(0, total_width, block_size):
|
||||
block_height = min(block_size, total_height - y)
|
||||
block_width = min(block_size, total_width - x)
|
||||
|
||||
# 计算当前分块的地理范围
|
||||
block_minx = bounds[0] + x * raster_resolution
|
||||
block_maxy = bounds[3] - y * raster_resolution
|
||||
block_maxx = block_minx + block_width * raster_resolution
|
||||
block_miny = block_maxy - block_height * raster_resolution
|
||||
|
||||
# 创建分块边界框
|
||||
block_bounds = box(block_minx, block_miny, block_maxx, block_maxy)
|
||||
block_gdf = merged_gdf[merged_gdf.geometry.intersects(block_bounds)].copy()
|
||||
|
||||
# 初始化分块为背景值 8
|
||||
block_raster = np.full((block_height, block_width), 8, dtype=np.uint8)
|
||||
|
||||
if not block_gdf.empty:
|
||||
# 分块栅格化
|
||||
block_transform = rasterio.transform.from_bounds(
|
||||
block_minx, block_miny, block_maxx, block_miny, block_width, block_height
|
||||
)
|
||||
shapes = ((mapping(geom), value) for geom, value in
|
||||
zip(block_gdf.geometry, block_gdf['value']))
|
||||
block_raster = rasterize(
|
||||
shapes=shapes,
|
||||
out=block_raster, # 使用预初始化的数组
|
||||
transform=block_transform,
|
||||
fill=8, # 未覆盖区域为 8
|
||||
dtype=rasterio.uint8,
|
||||
all_touched=True # 确保所有触及的像素都被计入
|
||||
)
|
||||
|
||||
# 直接写入当前分块到文件中
|
||||
dst.write(block_raster, 1, window=((y, y + block_height), (x, x + block_width)))
|
||||
print(f"已完成分块栅格化: x={x}, y={y}, 宽度={block_width}, 高度={block_height}")
|
||||
pbar.update(1)
|
||||
|
||||
print("栅格化完成!")
|
||||
print(f"栅格保存为: {output_raster_path}")
|
||||
print(f"栅格中类别值: 1=forest, 2=water, 3=shrub, 4=wetland, 5=jianzhu, 6=grass, 7=bareland, 8=background")
|
||||
|
||||
except Exception as e:
|
||||
print(f"发生错误: {str(e)}")
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
input_files = [
|
||||
r"sddimaoshp/forest/sdforest.shp",
|
||||
r"sddimaoshp/water/sdwater.shp",
|
||||
r"sddimaoshp/shrub/sdshrub.shp",
|
||||
r"sddimaoshp/wetland/sdwetland.shp",
|
||||
r"sddimaoshp/jianzhu/jianzhu2.shp",
|
||||
r"sddimaoshp/grass/sdgrass.shp",
|
||||
r"sddimaoshp/bareland/sdbareland.shp"
|
||||
]
|
||||
output_vector_file = r"sddimaoshp/hebingtif/shandongdimiao31.gpkg"
|
||||
output_raster_file = r"sddimaoshp/hebingtif/sddimao31.tif"
|
||||
|
||||
# Step 1: 合并矢量
|
||||
vector_path = merge_vectors_seven_categories(input_files, output_vector_file)
|
||||
|
||||
# Step 2: 如果合并成功,进行分块栅格化
|
||||
if vector_path:
|
||||
rasterize_vector_by_blocks(vector_path, output_raster_file, raster_resolution=10, block_size=5000)
|
|
@ -0,0 +1,111 @@
|
|||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
import rasterio
|
||||
from rasterio.features import rasterize
|
||||
import numpy as np
|
||||
from shapely.geometry import mapping
|
||||
|
||||
|
||||
def merge_and_rasterize_vectors_two_categories(file_paths, output_vector_path, output_raster_path,
|
||||
raster_resolution=30):
|
||||
"""
|
||||
合并2个矢量图层(forest, water),转为栅格并显示2个类别,保留0值作为背景
|
||||
|
||||
参数:
|
||||
file_paths: 包含2个矢量文件路径的列表 (按顺序: forest, water)
|
||||
output_vector_path: 输出合并后的矢量文件路径
|
||||
output_raster_path: 输出栅格文件路径
|
||||
raster_resolution: 输出栅格的分辨率(默认30米)
|
||||
"""
|
||||
try:
|
||||
# 检查输入文件数量
|
||||
if len(file_paths) != 2:
|
||||
raise ValueError("请提供正好2个矢量文件路径")
|
||||
|
||||
# 定义类别映射,按照指定顺序
|
||||
categories = ['forest', 'water']
|
||||
category_values = {'forest': 1, 'water': 2} # forest=1, water=2
|
||||
|
||||
# 读取所有矢量文件并添加分类
|
||||
gdfs = []
|
||||
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
|
||||
print(f"正在读取第{i}个矢量文件: {path}")
|
||||
gdf = gpd.read_file(path)
|
||||
gdf['category'] = category # 添加类别字段
|
||||
gdf['value'] = category_values[category] # 添加数值字段用于栅格化
|
||||
gdfs.append(gdf)
|
||||
|
||||
# 检查和统一坐标系(以第一个图层为基准)
|
||||
base_crs = gdfs[0].crs
|
||||
for i, gdf in enumerate(gdfs[1:], 2):
|
||||
if gdf.crs != base_crs:
|
||||
print(f"第{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
|
||||
gdfs[i - 1] = gdf.to_crs(base_crs)
|
||||
|
||||
# 合并所有GeoDataFrame
|
||||
print("正在合并图层...")
|
||||
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
|
||||
merged_gdf = merged_gdf.set_geometry('geometry')
|
||||
|
||||
# 保存合并后的矢量文件
|
||||
print(f"正在保存合并矢量结果到: {output_vector_path}")
|
||||
merged_gdf.to_file(output_vector_path)
|
||||
|
||||
# 计算栅格化的范围
|
||||
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
|
||||
width = int((bounds[2] - bounds[0]) / raster_resolution)
|
||||
height = int((bounds[3] - bounds[1]) / raster_resolution)
|
||||
|
||||
# 创建栅格化变换
|
||||
transform = rasterio.transform.from_bounds(
|
||||
bounds[0], bounds[1], bounds[2], bounds[3], width, height
|
||||
)
|
||||
|
||||
# 栅格化:将几何对象转为栅格,使用'value'字段作为像素值
|
||||
print("正在将矢量转为栅格...")
|
||||
shapes = ((mapping(geom), value) for geom, value in zip(merged_gdf.geometry, merged_gdf['value']))
|
||||
raster = rasterize(
|
||||
shapes=shapes,
|
||||
out_shape=(height, width),
|
||||
transform=transform,
|
||||
fill=0, # 背景值为0
|
||||
dtype=rasterio.uint8
|
||||
)
|
||||
|
||||
# 保存栅格文件
|
||||
with rasterio.open(
|
||||
output_raster_path,
|
||||
'w',
|
||||
driver='GTiff',
|
||||
height=height,
|
||||
width=width,
|
||||
count=1,
|
||||
dtype=rasterio.uint8,
|
||||
crs=base_crs,
|
||||
transform=transform,
|
||||
nodata=0 # 设置nodata值为0,表示背景
|
||||
) as dst:
|
||||
dst.write(raster, 1)
|
||||
|
||||
print("处理完成!")
|
||||
print(f"合并矢量保存为: {output_vector_path}")
|
||||
print(f"栅格保存为: {output_raster_path}")
|
||||
print(f"栅格中类别值: 0=背景, 1=forest, 2=water")
|
||||
|
||||
except Exception as e:
|
||||
print(f"发生错误: {str(e)}")
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
# 输入2个矢量文件路径(按顺序: forest, water)
|
||||
input_files = [
|
||||
r"E:\Data\z18\sd\Total_sd\shandonghebingshp\sdshp\forest\sdforest.shp",
|
||||
r"E:\Data\z18\sd\Total_sd\shandonghebingshp\sdshp\water\true\shandongsuiyu3.shp"
|
||||
]
|
||||
output_vector_file = r"E:\njds\shenyang\shengyangshp\kejianshe\merged_vector_forest_water.shp"
|
||||
output_raster_file = r"E:\njds\shenyang\shengyangshp\kejianshe\merged_raster_forest_water.tif"
|
||||
|
||||
# 执行合并和栅格化
|
||||
merge_and_rasterize_vectors_two_categories(input_files, output_vector_file, output_raster_file,
|
||||
raster_resolution=10)
|
|
@ -0,0 +1,113 @@
|
|||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
import rasterio
|
||||
from rasterio.features import rasterize
|
||||
import numpy as np
|
||||
from shapely.geometry import mapping
|
||||
|
||||
|
||||
def merge_and_rasterize_vectors_four_categories(file_paths, output_vector_path, output_raster_path,
|
||||
raster_resolution=30):
|
||||
"""
|
||||
合并4个矢量图层(bareland, grass, shrub, building),转为栅格并显示4个类别,保留0值作为背景
|
||||
|
||||
参数:
|
||||
file_paths: 包含4个矢量文件路径的列表 (按顺序: bareland, grass, shrub, building)
|
||||
output_vector_path: 输出合并后的矢量文件路径
|
||||
output_raster_path: 输出栅格文件路径
|
||||
raster_resolution: 输出栅格的分辨率(默认30米)
|
||||
"""
|
||||
try:
|
||||
# 检查输入文件数量
|
||||
if len(file_paths) != 4:
|
||||
raise ValueError("请提供正好4个矢量文件路径")
|
||||
|
||||
# 定义类别映射,按照指定顺序
|
||||
categories = ['bareland', 'grass', 'shrub', 'building']
|
||||
category_values = {'bareland': 1, 'grass': 2, 'shrub': 3, 'building': 4} # bareland=1, grass=2, shrub=3, building=4
|
||||
|
||||
# 读取所有矢量文件并添加分类
|
||||
gdfs = []
|
||||
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
|
||||
print(f"正在读取第{i}个矢量文件: {path}")
|
||||
gdf = gpd.read_file(path)
|
||||
gdf['category'] = category # 添加类别字段
|
||||
gdf['value'] = category_values[category] # 添加数值字段用于栅格化
|
||||
gdfs.append(gdf)
|
||||
|
||||
# 检查和统一坐标系(以第一个图层为基准)
|
||||
base_crs = gdfs[0].crs
|
||||
for i, gdf in enumerate(gdfs[1:], 2):
|
||||
if gdf.crs != base_crs:
|
||||
print(f"第{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
|
||||
gdfs[i - 1] = gdf.to_crs(base_crs)
|
||||
|
||||
# 合并所有GeoDataFrame
|
||||
print("正在合并图层...")
|
||||
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
|
||||
merged_gdf = merged_gdf.set_geometry('geometry')
|
||||
|
||||
# 保存合并后的矢量文件
|
||||
print(f"正在保存合并矢量结果到: {output_vector_path}")
|
||||
merged_gdf.to_file(output_vector_path)
|
||||
|
||||
# 计算栅格化的范围
|
||||
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
|
||||
width = int((bounds[2] - bounds[0]) / raster_resolution)
|
||||
height = int((bounds[3] - bounds[1]) / raster_resolution)
|
||||
|
||||
# 创建栅格化变换
|
||||
transform = rasterio.transform.from_bounds(
|
||||
bounds[0], bounds[1], bounds[2], bounds[3], width, height
|
||||
)
|
||||
|
||||
# 栅格化:将几何对象转为栅格,使用'value'字段作为像素值
|
||||
print("正在将矢量转为栅格...")
|
||||
shapes = ((mapping(geom), value) for geom, value in zip(merged_gdf.geometry, merged_gdf['value']))
|
||||
raster = rasterize(
|
||||
shapes=shapes,
|
||||
out_shape=(height, width),
|
||||
transform=transform,
|
||||
fill=0, # 背景值为0
|
||||
dtype=rasterio.uint8
|
||||
)
|
||||
|
||||
# 保存栅格文件
|
||||
with rasterio.open(
|
||||
output_raster_path,
|
||||
'w',
|
||||
driver='GTiff',
|
||||
height=height,
|
||||
width=width,
|
||||
count=1,
|
||||
dtype=rasterio.uint8,
|
||||
crs=base_crs,
|
||||
transform=transform,
|
||||
nodata=0 # 设置nodata值为0,表示背景
|
||||
) as dst:
|
||||
dst.write(raster, 1)
|
||||
|
||||
print("处理完成!")
|
||||
print(f"合并矢量保存为: {output_vector_path}")
|
||||
print(f"栅格保存为: {output_raster_path}")
|
||||
print(f"栅格中类别值: 0=背景, 1=bareland, 2=grass, 3=shrub, 4=building")
|
||||
|
||||
except Exception as e:
|
||||
print(f"发生错误: {str(e)}")
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
# 输入4个矢量文件路径(按顺序: bareland, grass, shrub, building)
|
||||
input_files = [
|
||||
r"E:\njds\shenyang\shengyangshp\bareland\bareland.shp",
|
||||
r"E:\njds\shenyang\shengyangshp\grass\grass2.shp",
|
||||
r"E:\njds\shenyang\shengyangshp\shrub\shrub.shp",
|
||||
r"E:\Data\z18\sd\Total_sd\shandonghebingshp\sdshp\jianzhu\true\shandongjianzhu3.shp"
|
||||
]
|
||||
output_vector_file = r"E:\njds\shenyang\shengyangshp\kejianshe\merged_vector_four.shp"
|
||||
output_raster_file = r"E:\njds\shenyang\shengyangshp\kejianshe\merged_raster_four.tif"
|
||||
|
||||
# 执行合并和栅格化
|
||||
merge_and_rasterize_vectors_four_categories(input_files, output_vector_file, output_raster_file,
|
||||
raster_resolution=10)
|
|
@ -0,0 +1,112 @@
|
|||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
import rasterio
|
||||
from rasterio.features import rasterize
|
||||
import numpy as np
|
||||
from shapely.geometry import mapping
|
||||
|
||||
|
||||
def merge_and_rasterize_vectors_three_categories(file_paths, output_vector_path, output_raster_path,
|
||||
raster_resolution=30):
|
||||
"""
|
||||
合并3个矢量图层(bareland, grass, shrub),转为栅格并显示3个类别,保留0值作为背景
|
||||
|
||||
参数:
|
||||
file_paths: 包含3个矢量文件路径的列表 (按顺序: bareland, grass, shrub)
|
||||
output_vector_path: 输出合并后的矢量文件路径
|
||||
output_raster_path: 输出栅格文件路径
|
||||
raster_resolution: 输出栅格的分辨率(默认30米)
|
||||
"""
|
||||
try:
|
||||
# 检查输入文件数量
|
||||
if len(file_paths) != 3:
|
||||
raise ValueError("请提供正好3个矢量文件路径")
|
||||
|
||||
# 定义类别映射,按照指定顺序
|
||||
categories = ['bareland', 'grass', 'shrub']
|
||||
category_values = {'bareland': 1, 'grass': 2, 'shrub': 3} # bareland=1, grass=2, shrub=3
|
||||
|
||||
# 读取所有矢量文件并添加分类
|
||||
gdfs = []
|
||||
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
|
||||
print(f"正在读取第{i}个矢量文件: {path}")
|
||||
gdf = gpd.read_file(path)
|
||||
gdf['category'] = category # 添加类别字段
|
||||
gdf['value'] = category_values[category] # 添加数值字段用于栅格化
|
||||
gdfs.append(gdf)
|
||||
|
||||
# 检查和统一坐标系(以第一个图层为基准)
|
||||
base_crs = gdfs[0].crs
|
||||
for i, gdf in enumerate(gdfs[1:], 2):
|
||||
if gdf.crs != base_crs:
|
||||
print(f"第{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
|
||||
gdfs[i - 1] = gdf.to_crs(base_crs)
|
||||
|
||||
# 合并所有GeoDataFrame
|
||||
print("正在合并图层...")
|
||||
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
|
||||
merged_gdf = merged_gdf.set_geometry('geometry')
|
||||
|
||||
# 保存合并后的矢量文件
|
||||
print(f"正在保存合并矢量结果到: {output_vector_path}")
|
||||
merged_gdf.to_file(output_vector_path)
|
||||
|
||||
# 计算栅格化的范围
|
||||
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
|
||||
width = int((bounds[2] - bounds[0]) / raster_resolution)
|
||||
height = int((bounds[3] - bounds[1]) / raster_resolution)
|
||||
|
||||
# 创建栅格化变换
|
||||
transform = rasterio.transform.from_bounds(
|
||||
bounds[0], bounds[1], bounds[2], bounds[3], width, height
|
||||
)
|
||||
|
||||
# 栅格化:将几何对象转为栅格,使用'value'字段作为像素值
|
||||
print("正在将矢量转为栅格...")
|
||||
shapes = ((mapping(geom), value) for geom, value in zip(merged_gdf.geometry, merged_gdf['value']))
|
||||
raster = rasterize(
|
||||
shapes=shapes,
|
||||
out_shape=(height, width),
|
||||
transform=transform,
|
||||
fill=0, # 背景值为0
|
||||
dtype=rasterio.uint8
|
||||
)
|
||||
|
||||
# 保存栅格文件
|
||||
with rasterio.open(
|
||||
output_raster_path,
|
||||
'w',
|
||||
driver='GTiff',
|
||||
height=height,
|
||||
width=width,
|
||||
count=1,
|
||||
dtype=rasterio.uint8,
|
||||
crs=base_crs,
|
||||
transform=transform,
|
||||
nodata=0 # 设置nodata值为0,表示背景
|
||||
) as dst:
|
||||
dst.write(raster, 1)
|
||||
|
||||
print("处理完成!")
|
||||
print(f"合并矢量保存为: {output_vector_path}")
|
||||
print(f"栅格保存为: {output_raster_path}")
|
||||
print(f"栅格中类别值: 0=背景, 1=bareland, 2=grass, 3=shrub")
|
||||
|
||||
except Exception as e:
|
||||
print(f"发生错误: {str(e)}")
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
# 输入3个矢量文件路径(按顺序: bareland, grass, shrub)
|
||||
input_files = [
|
||||
r"E:\njds\shenyang\shengyangshp\bareland\bareland.shp",
|
||||
r"E:\njds\shenyang\shengyangshp\grass\grass2.shp",
|
||||
r"E:\njds\shenyang\shengyangshp\shrub\shrub.shp"
|
||||
]
|
||||
output_vector_file = r"E:\njds\shenyang\shengyangshp\kejianshe\merged_vector_three.shp"
|
||||
output_raster_file = r"E:\njds\shenyang\shengyangshp\kejianshe\merged_raster_three.tif"
|
||||
|
||||
# 执行合并和栅格化
|
||||
merge_and_rasterize_vectors_three_categories(input_files, output_vector_file, output_raster_file,
|
||||
raster_resolution=30)
|
|
@ -0,0 +1,190 @@
|
|||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
import rasterio
|
||||
from rasterio.features import rasterize
|
||||
import numpy as np
|
||||
from shapely.geometry import mapping, box
|
||||
import os
|
||||
from tqdm import tqdm # 导入 tqdm 用于进度条
|
||||
|
||||
|
||||
def merge_vectors_seven_categories(file_paths, output_vector_path):
|
||||
"""
|
||||
合并7个地貌类型矢量图层并保存为GeoPackage格式(去掉cropland)
|
||||
|
||||
参数:
|
||||
file_paths: 包含7个矢量文件路径的列表 (按顺序: forest, water, shrub, wetland, jianzhu, grass, bareland)
|
||||
output_vector_path: 输出合并后的矢量文件路径(使用.gpkg格式)
|
||||
"""
|
||||
try:
|
||||
# 检查输入文件数量是否为7
|
||||
if len(file_paths) != 7:
|
||||
raise ValueError("请提供正好7个矢量文件路径")
|
||||
|
||||
for path in file_paths:
|
||||
if not os.path.exists(path):
|
||||
raise FileNotFoundError(f"文件不存在: {path}")
|
||||
|
||||
# 定义7个类别(去掉cropland)
|
||||
categories = ['forest', 'water', 'shrub', 'wetland', 'jianzhu', 'grass', 'bareland']
|
||||
category_values = {cat: i + 1 for i, cat in enumerate(categories)} # forest=1, water=2, ..., bareland=7
|
||||
|
||||
gdfs = []
|
||||
for i, (path, category) in enumerate(zip(file_paths, categories), 1):
|
||||
print(f"正在读取第{i}个矢量文件: {path}")
|
||||
gdf = gpd.read_file(path, engine="pyogrio")
|
||||
gdf['category'] = category
|
||||
gdf['value'] = category_values[category]
|
||||
if 'Shape_Area' in gdf.columns:
|
||||
gdf['Shape_Area'] = gdf['Shape_Area'].clip(upper=1e9)
|
||||
gdfs.append(gdf)
|
||||
|
||||
# 统一坐标系
|
||||
base_crs = gdfs[0].crs
|
||||
for i, gdf in enumerate(gdfs[1:], 2):
|
||||
if gdf.crs != base_crs:
|
||||
print(f"第{i}个图层坐标系不同,正在转换为第一个图层的坐标系...")
|
||||
gdfs[i - 1] = gdf.to_crs(base_crs)
|
||||
|
||||
print("正在合并图层...")
|
||||
merged_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
|
||||
merged_gdf = merged_gdf.set_geometry('geometry')
|
||||
|
||||
if not output_vector_path.endswith('.gpkg'):
|
||||
output_vector_path = output_vector_path.replace('.shp', '.gpkg')
|
||||
print("输出格式已更改为GeoPackage以支持大文件")
|
||||
print(f"正在保存合并矢量结果到: {output_vector_path}")
|
||||
merged_gdf.to_file(output_vector_path, driver='GPKG', engine="pyogrio")
|
||||
|
||||
print("矢量合并完成!")
|
||||
print(f"合并矢量保存为: {output_vector_path}")
|
||||
return output_vector_path
|
||||
|
||||
except Exception as e:
|
||||
print(f"发生错误: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def rasterize_vector_by_blocks(input_vector_path, output_raster_path, raster_resolution=30, block_size=5000):
|
||||
"""
|
||||
将合并后的矢量文件分块转为栅格,显示7个类别,保留0值作为背景,添加进度条,不清理数据
|
||||
|
||||
参数:
|
||||
input_vector_path: 输入的合并矢量文件路径(.gpkg格式)
|
||||
output_raster_path: 输出栅格文件路径
|
||||
raster_resolution: 输出栅格的分辨率(默认30米)
|
||||
block_size: 每个分块的宽度和高度(像素单位,默认5000)
|
||||
"""
|
||||
try:
|
||||
# 读取合并后的矢量文件
|
||||
print(f"正在读取合并矢量文件: {input_vector_path}")
|
||||
merged_gdf = gpd.read_file(input_vector_path, engine="pyogrio")
|
||||
|
||||
# 检查 value 字段是否有效
|
||||
if 'value' not in merged_gdf.columns:
|
||||
raise ValueError("矢量文件中缺少 'value' 字段,请确保文件包含正确的分类值")
|
||||
if not merged_gdf['value'].between(1, 7).all():
|
||||
print("警告: 'value' 字段包含无效值(应在 1-7 之间),可能导致数据丢失")
|
||||
merged_gdf = merged_gdf[merged_gdf['value'].between(1, 7)]
|
||||
|
||||
# 按 value 字段排序,优先级高的类别(例如 value 较大)后处理,避免被覆盖
|
||||
print("正在按 'value' 字段排序以确保优先级...")
|
||||
merged_gdf = merged_gdf.sort_values(by='value', ascending=True)
|
||||
|
||||
# 计算总范围和栅格尺寸
|
||||
bounds = merged_gdf.total_bounds # [minx, miny, maxx, maxy]
|
||||
total_width = int((bounds[2] - bounds[0]) / raster_resolution)
|
||||
total_height = int((bounds[3] - bounds[1]) / raster_resolution)
|
||||
|
||||
if total_width <= 0 or total_height <= 0:
|
||||
raise ValueError(f"栅格尺寸无效: 宽度={total_width}, 高度={total_height}")
|
||||
|
||||
# 创建栅格变换
|
||||
transform = rasterio.transform.from_bounds(bounds[0], bounds[1], bounds[2], bounds[3], total_width,
|
||||
total_height)
|
||||
|
||||
# 计算总分块数以设置进度条
|
||||
total_blocks = ((total_height + block_size - 1) // block_size) * ((total_width + block_size - 1) // block_size)
|
||||
print(f"总分块数: {total_blocks}")
|
||||
|
||||
# 创建并写入栅格文件,逐块写入以减少内存占用
|
||||
with rasterio.open(
|
||||
output_raster_path,
|
||||
'w',
|
||||
driver='GTiff',
|
||||
height=total_height,
|
||||
width=total_width,
|
||||
count=1,
|
||||
dtype=rasterio.uint8,
|
||||
crs=merged_gdf.crs,
|
||||
transform=transform,
|
||||
nodata=0
|
||||
) as dst:
|
||||
# 分块处理并添加进度条
|
||||
with tqdm(total=total_blocks, desc="栅格化进度") as pbar:
|
||||
for y in range(0, total_height, block_size):
|
||||
for x in range(0, total_width, block_size):
|
||||
block_height = min(block_size, total_height - y)
|
||||
block_width = min(block_size, total_width - x)
|
||||
|
||||
# 计算当前分块的地理范围
|
||||
block_minx = bounds[0] + x * raster_resolution
|
||||
block_maxy = bounds[3] - y * raster_resolution
|
||||
block_maxx = block_minx + block_width * raster_resolution
|
||||
block_miny = block_maxy - block_height * raster_resolution
|
||||
|
||||
# 创建分块边界框
|
||||
block_bounds = box(block_minx, block_miny, block_maxx, block_maxy)
|
||||
block_gdf = merged_gdf[merged_gdf.geometry.intersects(block_bounds)].copy()
|
||||
|
||||
if block_gdf.empty:
|
||||
pbar.update(1)
|
||||
continue
|
||||
|
||||
# 分块栅格化
|
||||
block_transform = rasterio.transform.from_bounds(
|
||||
block_minx, block_miny, block_maxx, block_maxy, block_width, block_height
|
||||
)
|
||||
shapes = ((mapping(geom), value) for geom, value in zip(block_gdf.geometry, block_gdf['value']))
|
||||
block_raster = rasterize(
|
||||
shapes=shapes,
|
||||
out_shape=(block_height, block_width),
|
||||
transform=block_transform,
|
||||
fill=0,
|
||||
dtype=rasterio.uint8,
|
||||
all_touched=True # 确保所有触及的像素都被计入,避免小对象丢失
|
||||
)
|
||||
|
||||
# 直接写入当前分块到文件中
|
||||
dst.write(block_raster, 1, window=((y, y + block_height), (x, x + block_width)))
|
||||
print(f"已完成分块栅格化: x={x}, y={y}, 宽度={block_width}, 高度={block_height}")
|
||||
pbar.update(1)
|
||||
|
||||
print("栅格化完成!")
|
||||
print(f"栅格保存为: {output_raster_path}")
|
||||
print(f"栅格中类别值: 0=背景, 1=forest, 2=water, 3=shrub, 4=wetland, 5=jianzhu, 6=grass, 7=bareland")
|
||||
|
||||
except Exception as e:
|
||||
print(f"发生错误: {str(e)}")
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
input_files = [
|
||||
r"sddimaoshp/forest/sdforest.shp",
|
||||
r"sddimaoshp/water/sdwater.shp",
|
||||
r"sddimaoshp/shrub/sdshrub.shp",
|
||||
r"sddimaoshp/wetland/sdwetland.shp",
|
||||
r"sddimaoshp/jianzhu/jianzhu2.shp",
|
||||
r"sddimaoshp/grass/sdgrass.shp",
|
||||
r"sddimaoshp/bareland/sdbareland.shp"
|
||||
]
|
||||
output_vector_file = r"sddimaoshp/hebingtif/shandongdimiao_30m.gpkg"
|
||||
output_raster_file = r"sddimaoshp/hebingtif/sddimao_30m.tif"
|
||||
|
||||
# Step 1: 合并矢量
|
||||
vector_path = merge_vectors_seven_categories(input_files, output_vector_file)
|
||||
|
||||
# Step 2: 如果合并成功,进行分块栅格化
|
||||
if vector_path:
|
||||
rasterize_vector_by_blocks(vector_path, output_raster_file, raster_resolution=30, block_size=5000)
|
|
@ -0,0 +1,153 @@
|
|||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
|
||||
# 尝试多种方式导入GDAL
|
||||
try:
|
||||
# 尝试直接导入GDAL
|
||||
import gdal
|
||||
import ogr
|
||||
import osr
|
||||
|
||||
print("成功导入GDAL库")
|
||||
except ImportError:
|
||||
try:
|
||||
# 尝试从osgeo导入
|
||||
from osgeo import gdal, ogr, osr
|
||||
|
||||
print("从osgeo成功导入GDAL库")
|
||||
except ImportError:
|
||||
print("无法导入GDAL库,请确保已安装GDAL及其Python绑定")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def process_rasters(shp_path, raster_path, output_path):
|
||||
"""
|
||||
使用shp文件裁剪栅格,保留不重叠部分设为value=8,然后与原栅格合并
|
||||
|
||||
参数:
|
||||
shp_path: shapefile的路径
|
||||
raster_path: 原始栅格文件路径 (value 1-7)
|
||||
output_path: 输出栅格文件路径
|
||||
"""
|
||||
try:
|
||||
# 设置GDAL错误处理
|
||||
gdal.UseExceptions()
|
||||
|
||||
# 打开栅格文件
|
||||
raster = gdal.Open(raster_path)
|
||||
if raster is None:
|
||||
print(f"无法打开栅格文件: {raster_path}")
|
||||
return None
|
||||
|
||||
# 获取栅格信息
|
||||
print(f"栅格大小: {raster.RasterXSize} x {raster.RasterYSize}, {raster.RasterCount} 波段")
|
||||
geo_transform = raster.GetGeoTransform()
|
||||
projection = raster.GetProjection()
|
||||
print(f"栅格投影: {projection}")
|
||||
|
||||
# 读取栅格数据
|
||||
band = raster.GetRasterBand(1)
|
||||
raster_data = band.ReadAsArray()
|
||||
nodata = band.GetNoDataValue()
|
||||
if nodata is None:
|
||||
nodata = 0
|
||||
|
||||
# 打开shapefile
|
||||
driver = ogr.GetDriverByName("ESRI Shapefile")
|
||||
try:
|
||||
vector = driver.Open(shp_path, 0) # 0表示只读模式
|
||||
except Exception as e:
|
||||
print(f"无法打开shapefile: {e}")
|
||||
return None
|
||||
|
||||
if vector is None:
|
||||
print(f"无法打开shapefile: {shp_path}")
|
||||
return None
|
||||
|
||||
# 获取shapefile信息
|
||||
layer = vector.GetLayer()
|
||||
feature_count = layer.GetFeatureCount()
|
||||
print(f"Shapefile有 {feature_count} 个要素")
|
||||
|
||||
# 创建栅格掩码
|
||||
# 使用GDAL内置的栅格化功能将shapefile转换为栅格
|
||||
print("将shapefile栅格化...")
|
||||
memory_driver = gdal.GetDriverByName('MEM')
|
||||
mask_ds = memory_driver.Create('', raster.RasterXSize, raster.RasterYSize, 1, gdal.GDT_Byte)
|
||||
mask_ds.SetGeoTransform(geo_transform)
|
||||
mask_ds.SetProjection(projection)
|
||||
|
||||
# 栅格化
|
||||
mask_band = mask_ds.GetRasterBand(1)
|
||||
mask_band.Fill(0) # 初始化为0
|
||||
|
||||
# 将shapefile栅格化到掩码上
|
||||
gdal.RasterizeLayer(mask_ds, [1], layer, burn_values=[1])
|
||||
|
||||
# 读取掩码数据
|
||||
mask_data = mask_band.ReadAsArray()
|
||||
|
||||
# 栅格有值区域的掩码 (value 1-7)
|
||||
valid_data_mask = (raster_data >= 1) & (raster_data <= 7)
|
||||
|
||||
# 获取被shapefile覆盖但不与原始有效栅格区域重叠的部分
|
||||
shp_mask = mask_data > 0 # shapefile 覆盖区域
|
||||
non_overlapping_mask = shp_mask & (~valid_data_mask) # shapefile 覆盖但不与原始栅格重叠区域
|
||||
|
||||
# 合并两个栅格
|
||||
merged_raster = raster_data.copy()
|
||||
merged_raster[non_overlapping_mask] = 8
|
||||
|
||||
# 创建输出栅格
|
||||
driver = gdal.GetDriverByName('GTiff')
|
||||
out_ds = driver.Create(output_path, raster.RasterXSize, raster.RasterYSize, 1, band.DataType)
|
||||
out_ds.SetGeoTransform(geo_transform)
|
||||
out_ds.SetProjection(projection)
|
||||
|
||||
# 写入数据
|
||||
out_band = out_ds.GetRasterBand(1)
|
||||
out_band.WriteArray(merged_raster)
|
||||
out_band.SetNoDataValue(nodata)
|
||||
|
||||
# 关闭数据集
|
||||
out_ds = None
|
||||
mask_ds = None
|
||||
raster = None
|
||||
vector = None
|
||||
|
||||
print(f"处理完成,结果保存至: {output_path}")
|
||||
|
||||
# 返回处理后的统计信息
|
||||
stats = {
|
||||
"原始栅格有效区域(value 1-7)像素数": int(np.sum(valid_data_mask)),
|
||||
"shapefile覆盖区域像素数": int(np.sum(shp_mask)),
|
||||
"不重叠区域(value=8)像素数": int(np.sum(non_overlapping_mask)),
|
||||
"结果栅格总有效像素数": int(np.sum(merged_raster > 0))
|
||||
}
|
||||
|
||||
return stats
|
||||
except Exception as e:
|
||||
print(f"发生错误: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return None
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
# 输入文件路径
|
||||
shp_path = r"E:\Data\z18\shandongshp\sd.shp"
|
||||
raster_path = r"E:\Data\z18\sd\sd_y10m\sddimao_10m.tif"
|
||||
output_path = r"E:\Data\z18\sd\sd_10m\sddimao_10mmerged.tif"
|
||||
|
||||
# 执行处理
|
||||
stats = process_rasters(shp_path, raster_path, output_path)
|
||||
|
||||
# 打印统计信息
|
||||
if stats:
|
||||
for key, value in stats.items():
|
||||
print(f"{key}: {value}")
|
||||
else:
|
||||
print("处理失败,无统计信息可显示")
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
import rasterio
|
||||
from rasterio.features import rasterize
|
||||
from rasterio.windows import Window
|
||||
import numpy as np
|
||||
from shapely.geometry import mapping
|
||||
import os
|
||||
|
||||
def large_vector_to_raster(input_shp_path, output_raster_path, raster_resolution=30, chunk_size=1000, window_size=1000, category_field='value', category_value=1):
|
||||
"""
|
||||
将大型面要素转为栅格数据,分块读取矢量并按窗口写入栅格,适用于大文件(例如2GB)
|
||||
|
||||
参数:
|
||||
input_shp_path: 输入shapefile路径
|
||||
output_raster_path: 输出栅格文件路径
|
||||
raster_resolution: 输出栅格分辨率(单位:米,默认30)
|
||||
chunk_size: 每次读取的矢量特征数量(默认1000)
|
||||
window_size: 每个栅格窗口的像素大小(默认1000x1000)
|
||||
category_field: 矢量数据中用于栅格化的字段名(默认'value')
|
||||
category_value: 默认类别值(若无字段,则所有特征赋此值,默认1)
|
||||
"""
|
||||
try:
|
||||
# 检查输入文件是否存在
|
||||
if not os.path.exists(input_shp_path):
|
||||
raise FileNotFoundError(f"输入文件 {input_shp_path} 不存在")
|
||||
|
||||
# 读取矢量文件的元信息以确定范围和CRS
|
||||
print("正在读取矢量元信息...")
|
||||
gdf_meta = gpd.read_file(input_shp_path, rows=1) # 只读一行获取元数据
|
||||
base_crs = gdf_meta.crs
|
||||
bounds = gdf_meta.total_bounds # [minx, miny, maxx, maxy]
|
||||
|
||||
# 计算栅格尺寸
|
||||
width = int((bounds[2] - bounds[0]) / raster_resolution)
|
||||
height = int((bounds[3] - bounds[1]) / raster_resolution)
|
||||
transform = rasterio.transform.from_bounds(bounds[0], bounds[1], bounds[2], bounds[3], width, height)
|
||||
|
||||
# 初始化输出栅格文件
|
||||
print(f"初始化输出栅格文件: {output_raster_path}")
|
||||
with rasterio.open(
|
||||
output_raster_path,
|
||||
'w',
|
||||
driver='GTiff',
|
||||
height=height,
|
||||
width=width,
|
||||
count=1,
|
||||
dtype=rasterio.uint8,
|
||||
crs=base_crs,
|
||||
transform=transform,
|
||||
nodata=0 # 背景值为0
|
||||
) as dst:
|
||||
# 分窗口处理
|
||||
for row_offset in range(0, height, window_size):
|
||||
for col_offset in range(0, width, window_size):
|
||||
window_height = min(window_size, height - row_offset)
|
||||
window_width = min(window_size, width - col_offset)
|
||||
window = Window(col_offset, row_offset, window_width, window_height)
|
||||
|
||||
# 计算当前窗口的地理范围
|
||||
window_transform = rasterio.windows.transform(window, transform)
|
||||
window_bounds = rasterio.windows.bounds(window, transform)
|
||||
|
||||
# 创建窗口的边界几何,用于筛选矢量数据
|
||||
window_poly = gpd.GeoSeries.from_wkt([f"POLYGON(({window_bounds[0]} {window_bounds[1]}, "
|
||||
f"{window_bounds[2]} {window_bounds[1]}, "
|
||||
f"{window_bounds[2]} {window_bounds[3]}, "
|
||||
f"{window_bounds[0]} {window_bounds[3]}, "
|
||||
f"{window_bounds[0]} {window_bounds[1]}))"]).iloc[0]
|
||||
|
||||
# 分块读取矢量数据并筛选当前窗口内的特征
|
||||
print(f"处理窗口: row={row_offset}, col={col_offset}")
|
||||
chunk_raster = np.zeros((window_height, window_width), dtype=rasterio.uint8)
|
||||
chunk_iterator = gpd.read_file(input_shp_path, chunksize=chunk_size)
|
||||
|
||||
for chunk_idx, chunk_gdf in enumerate(chunk_iterator):
|
||||
# 筛选与当前窗口相交的特征
|
||||
window_gdf = chunk_gdf[chunk_gdf.intersects(window_poly)]
|
||||
if len(window_gdf) == 0:
|
||||
continue
|
||||
|
||||
print(f" - 处理分块 {chunk_idx},筛选出 {len(window_gdf)} 个特征")
|
||||
# 如果没有指定category_field,则赋默认值
|
||||
if category_field not in window_gdf.columns:
|
||||
window_gdf['value'] = category_value
|
||||
|
||||
# 栅格化当前分块
|
||||
shapes = ((mapping(geom), value) for geom, value in zip(window_gdf.geometry, window_gdf[category_field]))
|
||||
temp_raster = rasterize(
|
||||
shapes=shapes,
|
||||
out_shape=(window_height, window_width),
|
||||
transform=window_transform,
|
||||
fill=0,
|
||||
dtype=rasterio.uint8
|
||||
)
|
||||
|
||||
# 更新窗口栅格(保留非0值)
|
||||
mask = temp_raster > 0
|
||||
chunk_raster[mask] = temp_raster[mask]
|
||||
|
||||
# 写入当前窗口到栅格文件
|
||||
if np.any(chunk_raster > 0):
|
||||
dst.write(chunk_raster, 1, window=window)
|
||||
|
||||
print("处理完成!")
|
||||
print(f"栅格保存为: {output_raster_path}")
|
||||
print(f"栅格中类别值: 0=背景, 其他值由 {category_field} 字段或默认值 {category_value} 确定")
|
||||
|
||||
except Exception as e:
|
||||
print(f"发生错误: {str(e)}")
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
input_shp = r"E:\large_data\large_vector.shp" # 替换为您的2GB矢量文件路径
|
||||
output_raster = r"E:\large_data\large_raster.tif" # 输出栅格路径
|
||||
large_vector_to_raster(
|
||||
input_shp_path=input_shp,
|
||||
output_raster_path=output_raster,
|
||||
raster_resolution=30, # 分辨率30米
|
||||
chunk_size=1000, # 每次读取1000个特征
|
||||
window_size=1000, # 每个窗口1000x1000像素
|
||||
category_field='value', # 假设矢量数据中有'value'字段,若无则用默认值
|
||||
category_value=1 # 默认类别值
|
||||
)
|
Loading…
Reference in New Issue