Randomly sample from geopandas DataFrame in Python
Here's another way to do it:
import geopandas as gpd
import numpy as np
# load an example polygons geodataframe
gdf_polys = gpd.read_file(gpd.datasets.get_path('nybb'))
It looks like the following:
# find the bounds of your geodataframe
x_min, y_min, x_max, y_max = gdf_polys.total_bounds
# set sample size
n = 100
# generate random data within the bounds
x = np.random.uniform(x_min, x_max, n)
y = np.random.uniform(y_min, y_max, n)
# convert them to a points GeoSeries
gdf_points = gpd.GeoSeries(gpd.points_from_xy(x, y))
# only keep those points within polygons
gdf_points = gdf_points[gdf_points.within(gdf_polys.unary_union)]
Now you have:
Here's a solution that takes advantage of MultiPoint
and MultiPolygon
to avoid loops.
import numpy as np
import geopandas as gpd
import shapely.geometry
def sample_geoseries(geoseries, size, overestimate=2):
polygon = geoseries.unary_union
min_x, min_y, max_x, max_y = polygon.bounds
ratio = polygon.area / polygon.envelope.area
samples = np.random.uniform((min_x, min_y), (max_x, max_y), (int(size / ratio * overestimate), 2))
multipoint = shapely.geometry.MultiPoint(samples)
multipoint = multipoint.intersection(polygon)
samples = np.array(multipoint)
while samples.shape[0] < size:
# emergency catch in case by bad luck we didn't get enough within the polygon
samples = np.concatenate([samples, random_points_in_polygon(polygon, size, overestimate=overestimate)])
return samples[np.random.choice(len(samples), size)]
geodata = gpd.read_file(bayshp)
points = sample_geoseries(geodata['geometry'])
GeoPandas
uses Shapely
geometries. As far as a know, there is no a function which gets random points within a polygon. So, you must write any like below. Add this script to yours.
from shapely.geometry import Point
import geopandas as gpd
import random
######
def random_points_in_polygon(number, polygon):
points = []
min_x, min_y, max_x, max_y = polygon.bounds
i= 0
while i < number:
point = Point(random.uniform(min_x, max_x), random.uniform(min_y, max_y))
if polygon.contains(point):
points.append(point)
i += 1
return points # returns list of shapely point
######
geodata = gpd.read_file("path/to/your/file.shp")
# generate 5 points within the first county polygon in geodata
points = random_points_in_polygon(5, geodata.iloc[0].geometry)
# Coordinates of the first point
# print(points[0].x, points[0].y)
# print coordinates of all points
for i, point in enumerate(points):
print("Point {}: ({},{})".format(str(i+1), point.x, point.y))
Reference: How to generate random coordinates in a multipolygon in python