Geopandas: how to plot countries/cities?
From the code you've posted I can't see anything wrong with the plotting, so I assume that the issue might be somewhere in your data aggregation or merging.
Here is a solution that starts by generating data which should be similar to yours, then counts the number of times a country appears in the data as a proportion of the size of the dataset, as this is the required metric. We'll focus on just using a few countries as an example:
from random import choices
import pandas as pd
import numpy as np
def generate_data():
k = 100
countries_of_interest = ['USA','ARG','BRA','GBR','ESP','RUS']
countries = choices(countries_of_interest, k=k)
start_yr = 2010
end_yr = 2021
return pd.DataFrame({'Country':countries,
'Year':np.random.randint(start_yr, end_yr, k)},
index=range(len(countries)))
def aggregate_data(df):
data = df.groupby('Country').agg('count')*100.0/len(df)
data = data.reset_index().rename(columns={'Year':'proportion_of_dataset'})
return data
df = generate_data()
# Country Year
# 0 USA 2017
# 1 GBR 2014
# 2 USA 2013
# 3 BRA 2016
# 4 BRA 2018
# .. ... ...
# 95 ESP 2014
# 96 USA 2015
# 97 RUS 2019
# 98 RUS 2012
# 99 RUS 2011
#
# [100 rows x 2 columns]
data = aggregate_data(df)
# Country proportion_of_dataset
# 0 ARG 20.0
# 1 BRA 17.0
# 2 ESP 14.0
# 3 GBR 14.0
# 4 RUS 19.0
# 5 USA 16.0
Now load the country border shapefile using geopandas, and rename columns:
import geopandas as gpd
shapefile = 'path_to_shapfile_folder/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp'
gdf = gpd.read_file(shapefile)[['ADMIN', 'ADM0_A3', 'geometry']]
gdf.columns = ['country', 'country_code', 'geometry']
gdf.head()
# country country_code \
# 0 Fiji FJI
# 1 United Republic of Tanzania TZA
# 2 Western Sahara SAH
# 3 Canada CAN
# 4 United States of America USA
#
# geometry
# 0 MULTIPOLYGON (((180.00000 -16.06713, 180.00000...
# 1 POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...
# 2 POLYGON ((-8.66559 27.65643, -8.66512 27.58948...
# 3 MULTIPOLYGON (((-122.84000 49.00000, -122.9742...
# 4 MULTIPOLYGON (((-122.84000 49.00000, -120.0000...
Now we want to merge the country polygon dataframe with our aggregated data. Note: we want to do a left join (on the full country polygon dataframe) so that we include all countries, even ones we don't have data for. Also note that we are adding missing values for these countries by filling NaNs with zeros:
merged = gdf.merge(data, left_on = 'country_code', right_on = 'Country', how='left')
merged['proportion_of_dataset'] = merged['proportion_of_dataset'].fillna(0)
Using your code to create the geojson:
import json
merged_json = json.loads(merged.to_json())
json_data = json.dumps(merged_json)
Finally, we'll put your plotting code in a function, and pass in as arguments the geojson, column to plot, and the plot title:
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer
def plot_map(json_data,plot_col,title):
geosource = GeoJSONDataSource(geojson = json_data)
#Define a sequential multi-hue color palette.
palette = brewer['YlGnBu'][8]
palette = palette[::-1]
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 40)
tick_labels = {'0': '0%', '5': '5%', '10':'10%', '15':'15%', '20':'20%', '25':'25%', '30':'30%','35':'35%', '40': '>40%'}
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)
p = figure(title = title, plot_height = 600 , plot_width = 950, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.patches('xs','ys', source = geosource,fill_color = {'field' :plot_col, 'transform' : color_mapper},
line_color = 'black', line_width = 0.25, fill_alpha = 1)
p.add_layout(color_bar, 'below')
output_notebook()
#Display figure.
show(p)
Now all we have to do is call the plotting function, passing in the required parameters:
plot_map(json_data,'proportion_of_dataset','Dataset countries of origin')