superset-frontend/plugins/legacy-plugin-chart-country-map/scripts/Country Map GeoJSON Generator.ipynb
pip install geopandas shapely matplotlib
Download datasets (Admin 0 - Countries in 1:10, and Admin 1 – States, Provinces in 1:10 and 1:50) from Natural Earch Data:
# Dependencies
import os
import json
import requests
import geopandas as gpd
import matplotlib.pyplot as plt
import shapely
import pandas as pd
import shapely.geometry
import shapely.ops
import shapely.affinity
from shapely.geometry import Polygon, MultiPolygon
import shutil
speed_run = False
# set this to True if you want to skip all the rendering of previews in this notebook and just get an update of GeoJSON/TS/JSON files.
data_dir = os.path.expanduser("~/Downloads")
if not os.path.exists(data_dir):
os.mkdir(data_dir)
def download_files(skip_existing: bool):
for url in [
"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip",
"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_1_states_provinces.zip",
"https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_1_states_provinces.zip"
]:
file_name = url.split('/')[-1]
full_file_name = f'{data_dir}/{file_name}'
# temporary fix
url = url.replace("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download", "https://naciscdn.org/naturalearth")
with requests.get(
url,
headers={
"accept-encoding": "gzip, deflate, br",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
},
stream=True,
) as res:
file_size = int(res.headers['content-length'])
if res.status_code != 200:
print("Error downloading files. Please open the URL to download them from browser manually.")
break
if (
skip_existing and
os.path.exists(full_file_name) and
file_size == os.path.getsize(full_file_name)
):
print(f"Skip {file_name} because it already exists")
continue
print(f"Downloading {file_name}... \r", end="")
with open(full_file_name, "wb") as fh:
fh.write(res.content)
print("Done. ")
download_files(skip_existing=False)
# Read Natural Earth data files into GeoDataFrames
df_admin0_10m = gpd.read_file(f"{data_dir}/ne_10m_admin_0_countries.zip")
df_10m = gpd.read_file(f"{data_dir}/ne_10m_admin_1_states_provinces.zip")
df_50m = gpd.read_file(f"{data_dir}/ne_50m_admin_1_states_provinces.zip")
# Convert column names to lowercase
df_admin0_10m.columns = df_admin0_10m.columns.str.lower()
df_50m.groupby('admin').count()
# Use 1:50m geometry for some large countries:
print(*df_50m['admin'].unique(), sep='\n')
df = pd.concat([df_10m[~df_10m['admin'].isin(df_50m['admin'].unique())], df_50m])
rdf = df[(df['admin'] == 'Saint Pierre and Miquelon')]
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
rdf
<span style="color: red; font-size: 1.5em">TO SUPPORT NEW COUNTRIES, ADD COUNTRY NAME BELOW</span>
# Country names used in file names
countries = [
'afghanistan',
'aland',
'albania',
'algeria',
'american samoa',
'andorra',
'angola',
'anguilla',
'antarctica',
'antigua and barbuda',
'argentina',
'armenia',
'aruba',
'australia',
'austria',
'azerbaijan',
'the bahamas',
'bahrain',
'bangladesh',
'barbados',
'belarus',
'belgium',
'belize',
'benin',
'bermuda',
'bhutan',
'bolivia',
# 'bonaire, sint eustatius and saba', # Part of Netherlands Antilles, part of Netherlands, according to Natural Earth?
'bosnia and herzegovina',
'botswana',
# 'bouvet island', # part of Norway, in Natural Earth data
'brazil',
'brunei',
'british indian ocean territory',
'bulgaria',
'burkina faso',
'burundi',
'cape verde',
'cambodia',
'cameroon',
'canada',
'cayman islands',
'central african republic',
'chad',
'chile',
'china',
# 'christmas island', # part of British Indian Ocean Territory, according to Natural Earth
# 'cocos (keeling) islands', # part of British Indian Ocean Territory, according to Natural Earth
'colombia',
'comoros',
'democratic republic of the congo',
'cook islands',
'costa rica',
'croatia',
'cuba',
'curaçao',
'cyprus',
'czech republic',
'denmark',
'djibouti',
'dominica',
'dominican republic',
'ecuador',
'egypt',
'el salvador',
'equatorial guinea',
'eritrea',
'estonia',
# 'eswatini', # not sure why this doesn't work — Swaziland isn't available to alias, either.
'ethiopia',
'falkland islands',
'faroe islands',
'fiji',
'finland',
'france',
# 'french guiana',
'french polynesia',
#'french southern and antarctic lands',
'gabon',
'gambia',
'germany',
'ghana',
'gibraltar',
'greece',
'greenland',
'grenada',
# 'guadeloupe', # part of France, in Natural Earth data
'guam',
'guatemala',
'haiti',
'hungary',
'guernsey',
'guinea',
# 'guinea-bissau', # not sure why this isn't working
'guyana',
'honduras',
'iceland',
'india',
'indonesia',
'iran',
'israel',
'italy',
'ivory coast',
'japan',
'jordan',
'kazakhstan',
'kenya',
'korea',
'kuwait',
'kyrgyzstan',
'laos',
'latvia',
'lebanon',
'lesotho',
'liberia',
'libya',
'liechtenstein',
'lithuania',
'luxembourg',
# 'macao', # part of China, in Natural Earth data
'macedonia',
'madagascar',
'malawi',
'malaysia',
'maldives',
'mali',
'malta',
'marshall islands',
# 'martinique', # part of France, in Natural Earth data
'mauritania',
'mauritius',
# 'mayotte', # part of France, in Natural Earth data
'mexico',
'moldova',
'montserrat',
'monaco',
'mongolia',
'montenegro',
'morocco',
'mozambique',
'myanmar',
'namibia',
'nauru',
'nepal',
'netherlands',
'new caledonia',
'new zealand',
'nicaragua',
'niger',
'nigeria',
'niue',
'norfolk island',
'northern mariana islands',
'norway',
'oman',
'pakistan',
'palau',
# 'palestine', # part of Israel, in Natural Earth data
'panama',
'papua new guinea',
'paraguay',
'peru',
# 'pitcairn', # part of UK, in Natural Earth data
'philippines',
'poland',
'portugal',
'puerto rico',
'qatar',
# 'réunion', # part of France, in Natural Earth data
'republic of serbia',
'romania',
'russia',
'rwanda',
'saint barthelemy',
# 'saint helena, ascension and tristan da cunha', # part of UK, in Natural Earth data
'saint lucia',
'saint martin',
#'saint martin (french part)', part of Saint Martin, in Natural Earth data
'saint pierre and miquelon',
'saint vincent and the grenadines',
'samoa',
'san marino',
'sao tome and principe',
'saudi arabia',
'senegal',
'seychelles',
'sierra leone',
'singapore',
# 'sint maarten (dutch part)', part of Saint Martin, in Natural Earth data
'slovakia',
'slovenia',
'solomon islands',
'somalia',
'south africa',
# 'south sudan', # part of Sudan, in Natural Earth data
'spain',
'sri lanka',
'sudan',
'suriname',
# 'svalbard and jan mayen', # part of Norway, in Natural Earth data
'sweden',
'switzerland',
'syria',
'taiwan',
'tajikistan',
'tanzania',
'thailand',
'timorleste',
'togo',
# 'tokelau', # part of New Zealand, in Natural Earth data
'tonga',
'trinidad and tobago',
'tunisia',
'turkey',
'turkmenistan',
'turks and caicos islands',
'tuvalu',
'uganda',
'uk',
'ukraine',
'united arab emirates',
'united states minor outlying islands',
'uruguay',
'usa',
'uzbekistan',
'vanuatu',
'vatican',
'venezuela',
'vietnam',
'british virgin islands',
'united states virgin islands',
'wallis and futuna',
'yemen',
'zambia',
'zimbabwe'
]
# country name used in dataset
country_name_aliases = {
"korea": "south korea",
"uk": "united kingdom",
"usa": "united states of america",
"timorleste": "east timor",
"tanzania": "united republic of tanzania",
}
# CSV files that are defined later in the notebook:
region_maps = [
'france_regions',
'france_overseas',
'italy_regions',
'philippines_regions',
'turkey_regions'
]
# Make sure all country names are covered:
invalid_countries = [x for x in countries if (country_name_aliases.get(x, x) not in df["admin"].str.lower().unique()) and (x not in region_maps)]
if invalid_countries:
print(f"Following country names are not valid: {invalid_countries}")
Preview all countries:
# Plotting style defaults:
plot_styles = {
'edgecolor': 'black', # Sets the color of the border of each geometry in the plot to black
'column': 'name', # Specifies the column to be used for coloring the geometries based on its values
'legend': False, # Disables the legend for the plot
'cmap': 'tab20', # Sets the colormap to 'tab20' which provides a distinct set of colors for visual differentiation
'linewidth': 0.25 # Sets the thickness of the edges/borders of the geometries
}
def get_gdf(country):
country_alias = country_name_aliases.get(country, country)
if alt_maps and country in alt_maps:
gdf = alt_maps[country]
else:
gdf = df[df["admin"].str.lower() == country_alias]
return gdf.copy()
def plot_all_countries(countries, subplot_width=5, subplot_height=5, base_tolerance=0.01):
if not countries:
print("No countries to plot.")
return
num_countries = len(countries)
ncols = 6 # Set fixed number of columns
nrows = max((num_countries + ncols - 1) // ncols, 1) # Ensure at least one row
figsize = (ncols * subplot_width, nrows * subplot_height)
print(f"Debug Info: figsize={figsize}, ncols={ncols}, nrows={nrows}, num_countries={num_countries}")
plt.figure(figsize=figsize)
plt.rc('font', size=24) # Sets the font size globally
for i, country in enumerate(countries):
ax = plt.subplot(nrows, ncols, i + 1)
gdf = get_gdf(country)
if not gdf.empty:
gdf_projected = gdf.to_crs(epsg=6933)
area = gdf_projected['geometry'].area.sum()
dynamic_tolerance = base_tolerance * (area / 1e6) ** 0.8
gdf_projected['geometry'] = gdf_projected['geometry'].simplify(tolerance=dynamic_tolerance, preserve_topology=True)
gdf.plot(ax=ax, **plot_styles)
ax.set_aspect('equal', adjustable='datalim')
else:
ax.text(0.5, 0.5, country, ha='center', va='center', fontsize=24)
ax.set_title(country)
plt.tight_layout()
plt.show()
plt.rcdefaults()
# If you want to see a preview of all countries before they're touched up, just switch this to True:
# It's disabled because it takes a while to run.
plot_preview = False
if plot_preview:
plot_all_countries()
usa = df[df['adm0_a3'] == 'USA']
not speed_run and usa.plot(**plot_styles)
def reposition(df, idx, xoff=None, yoff=None, xscale=None, yscale=None, simplify=None):
def move_and_scale(series):
if xoff or yoff:
series = shapely.affinity.translate(series, xoff or 0, yoff or 0)
if xscale or yscale:
series = shapely.affinity.scale(series, xscale or 1, yscale or 1)
if simplify:
series = series.simplify(simplify, preserve_topology=False)
return series
df.loc[idx, 'geometry'] = df.loc[idx, 'geometry'].apply(move_and_scale)
usa_copy = usa.copy()
reposition(usa_copy, usa.name == 'Hawaii', 51, 5.5)
reposition(usa_copy, usa.name == 'Alaska', 35, -34, 0.35, 0.35)
not speed_run and usa_copy.plot(figsize=(8,8), **plot_styles)
China claims sovereign over Taiwan. For disputed territories, we respect each country and give them what they want.
In addition, Hong Kong and Macau should also be included in a China map.
# Chinese Special Administrative Regions
china_sars = df_admin0_10m.loc[
df_admin0_10m.name_en.isin(['Taiwan', 'Hong Kong', 'Macau']),
[x for x in df_admin0_10m.columns if x in df.columns]
]
china_sars = china_sars.merge(pd.DataFrame(
data={
"name_en": ["Taiwan", "Hong Kong", "Macau"],
"name_zh": ["中国台湾", "香港特别行政区", "澳门特别行政区"],
"iso_3166_2": ["CN-71", "CN-91", "CN-92"],
},
), on="name_en", how="left")
china_sars
china = df[df.admin == "China"]
china_copy = pd.concat([china, china_sars], ignore_index=True)
# Combine the 'name_zh' columns
china_copy["name_zh"] = china_copy["name_zh"].combine_first(china_copy["name_zh_y"])
# Drop the extra 'name_zh_x' and 'name_zh_y' columns, if they exist
china_copy = china_copy.drop(["name_zh_x", "name_zh_y"], axis=1)
# Plotting the DataFrame
not speed_run and china_copy.plot(**plot_styles)
Note ISO-3166-2:CN has updated subdivisions to use letters instead of numbers (e.g. CN-91 -> CN-HK). We kept the numeric code for backward compatibility.
finland_aland = df_admin0_10m.loc[
df_admin0_10m.name_en.isin(['Åland']),
[x for x in df_admin0_10m.columns if x in df.columns]
]
finland_aland = finland_aland.merge(pd.DataFrame(
data={
"name_en": ["Åland"],
"name_fi": ["Ahvenanmaan maakunta"],
"iso_3166_2": ["FI-01"],
},
), on="name_en", how="left")
finland = df[df.admin == "Finland"]
# Concatenate the 'finland' DataFrame with 'finland_aland' DataFrame
finland_copy = pd.concat([finland, finland_aland], ignore_index=True)
# Combine 'name_fi' columns. However, since both columns are named 'name_fi', this might be redundant
# If you have two different columns for 'name_fi' values in each DataFrame, specify them as 'name_fi_x' and 'name_fi_y'
finland_copy["name_fi"] = finland_copy["name_fi"].combine_first(finland_copy["name_fi"])
# Drop the 'name_fi' column, if that's intended. This will remove the 'name_fi' data entirely.
# If you meant to drop other columns (like 'name_fi_x' and 'name_fi_y'), update the column names accordingly
finland_copy = finland_copy.drop(["name_fi"], axis=1)
# Plotting the DataFrame
not speed_run and finland_copy.plot(figsize=(7, 7), **plot_styles)
russia_copy = df[df['adm0_a3'] == 'RUS'].copy()
crimea = russia_copy[russia_copy['iso_3166_2'] == 'UA-43'].copy()
sevastopol = russia_copy[russia_copy['iso_3166_2'] == 'UA-40'].copy()
ukraine_with_crimea = pd.concat([df[df['adm0_a3'] == 'UKR'], crimea, sevastopol], ignore_index=True)
# kyiv = ukraine_with_crimea[ukraine_with_crimea['iso_3166_2'] == 'UA-30']
# kyiv_oblast = ukraine_with_crimea[ukraine_with_crimea['iso_3166_2'] == 'UA-32']
# Update the name of the Kyiv city entry
ukraine_with_crimea.loc[ukraine_with_crimea['iso_3166_2'] == 'UA-30', 'name'] = 'Kyiv'
# # Update the name of the Kyiv Oblast entry
ukraine_with_crimea.loc[ukraine_with_crimea['iso_3166_2'] == 'UA-32', 'name'] = 'Kyiv Oblast'
# Plotting the DataFrame
not speed_run and ukraine_with_crimea.plot(figsize=(7,7), **plot_styles)
india = df[df['admin'] == 'India']
india_copy = india.copy()
# Download and load the GeoJSON file for India
india_geojson_url = "https://github.com/geohacker/india/raw/bcb920c7d3c686f01d085f7661c9ba89bf9bf65e/state/india_state_kashmir_ladakh.geojson"
try:
india_gdf = gpd.read_file(india_geojson_url)
# Rename column to 'ST_ID' to 'iso_3166_2' for consistency
india_gdf.rename(columns={'ST_ID': 'iso_3166_2'}, inplace=True)
# Update the geometry for the states of Jammu and Kashmir and Ladakh
india_copy.loc[india_copy['iso_3166_2'] == 'IN-JK', 'geometry'] = india_gdf[india_gdf['iso_3166_2'] == 'IN-JK'].dissolve(by='iso_3166_2').reset_index()["geometry"].values
india_copy.loc[india_copy['iso_3166_2'] == 'IN-LA', 'geometry'] = india_gdf[india_gdf['iso_3166_2'] == 'IN-LA'].dissolve(by='iso_3166_2').reset_index()["geometry"].values
print("GeoJSON file for India downloaded and loaded successfully.")
except Exception as e:
print(f"Unable to download or load the GeoJSON file for India. Error: {str(e)}")
print("Please download the file from the URL and try again.")
not speed_run and india_copy.plot(**plot_styles)
norway = df[df['adm0_a3'] == 'NOR']
not speed_run and norway.plot(**plot_styles)
norway_copy = norway.copy()
norway_copy = norway_copy[norway_copy["iso_3166_2"] != "NO-X01~"]
reposition(norway_copy, norway.name == 'Svalbard', -12, -8, 0.5, 0.5)
#reposition(norway_copy, norway.name == 'Nordland', 10, 0, 2, 2)
not speed_run and norway_copy.plot(**plot_styles)
portugal = df[df.admin == 'Portugal']
not speed_run and portugal.plot(**plot_styles)
portugal_copy = portugal.copy()
reposition(portugal_copy, portugal.name == 'Azores', 11, 0)
reposition(portugal_copy, portugal.name == 'Madeira', 6, 2, simplify=0.015)
not speed_run and portugal_copy.plot(figsize=(8, 8), **plot_styles)
spain = df[df.admin == 'Spain']
not speed_run and spain.plot(**plot_styles)
spain_copy = spain.copy()
reposition(spain_copy, spain.name.isin(['Las Palmas', 'Santa Cruz de Tenerife']), 3, 7, 1, 1)
not speed_run and spain_copy.plot(figsize=(8, 8), **plot_styles)
russia = df[df.admin == 'Russia']
not speed_run and russia.plot(**plot_styles)
def shift_geom(geom, cutoff=0):
border = shapely.geometry.LineString([(cutoff, -90), (cutoff, 90)])
splitted_geom = shapely.ops.split(geom, border)
# Create a list to store moved geometries
moved_geom = []
# Check if the split operation returned a GeometryCollection
if isinstance(splitted_geom, shapely.geometry.GeometryCollection):
# Iterate over each geometry in the GeometryCollection
for item in splitted_geom.geoms:
minx, miny, maxx, maxy = item.bounds
if minx < cutoff:
# Translate the geometry
moved_geom.append(shapely.affinity.translate(item, xoff=360 - cutoff))
else:
moved_geom.append(item)
else:
# If the result is not a GeometryCollection, it means no split occurred
moved_geom.append(geom)
# Combine all moved geometries into a single geometry
return shapely.ops.unary_union(moved_geom)
# Applying the function to the DataFrame
russia_copy = russia.copy()
russia_copy.loc[
russia.name == 'Chukchi Autonomous Okrug', 'geometry'
] = russia_copy.loc[
russia.name == 'Chukchi Autonomous Okrug', 'geometry'
].apply(shift_geom)
# Plotting
not speed_run and russia_copy.plot(figsize=(20, 20), **plot_styles)
turkey = df[df.admin == 'Turkey'][['iso_3166_2','geometry']]
not speed_run and turkey.plot(**{key: value for key, value in plot_styles.items() if key != 'column'})
# NUTS - 1 Codes for Turkey and correspong region - city names
region_dict = {
'TR1': ['TR-34'],
'TR2': ['TR-59', 'TR-22', 'TR-39', 'TR-10', 'TR-17'],
'TR3': ['TR-35', 'TR-09', 'TR-20', 'TR-48', 'TR-45', 'TR-03', 'TR-43', 'TR-64'],
'TR4': ['TR-16', 'TR-26', 'TR-11', 'TR-41', 'TR-54', 'TR-81', 'TR-14', 'TR-77'],
'TR5': ['TR-06', 'TR-42', 'TR-70'],
'TR6': ['TR-07', 'TR-32', 'TR-15', 'TR-01', 'TR-33', 'TR-31', 'TR-46', 'TR-80'],
'TR7': ['TR-71', 'TR-68', 'TR-51', 'TR-50', 'TR-40', 'TR-38', 'TR-58', 'TR-66'],
'TR8': ['TR-67', 'TR-78', 'TR-74', 'TR-37', 'TR-18', 'TR-57', 'TR-55', 'TR-60', 'TR-19', 'TR-05'],
'TR9': ['TR-61', 'TR-52', 'TR-28', 'TR-53', 'TR-08', 'TR-29'],
'TRA': ['TR-25', 'TR-24', 'TR-69', 'TR-04', 'TR-36', 'TR-76', 'TR-75'],
'TRB': ['TR-44', 'TR-23', 'TR-12', 'TR-62', 'TR-65', 'TR-49', 'TR-13', 'TR-30'],
'TRC': ['TR-27', 'TR-02', 'TR-79', 'TR-63', 'TR-21', 'TR-47', 'TR-72', 'TR-73', 'TR-56']}
# Region names corresponding to NUTS-1
region_name_dict = {'TR1':'İstanbul',
'TR2':'Batı Marmara',
'TR3':'Ege',
'TR4':'Doğu Marmara',
'TR5':'Batı Anadolu',
'TR6':'Akdeniz',
'TR7':'Orta Anadolu',
'TR8':'Batı Karadeniz',
'TR9':'Doğu Karadeniz',
'TRA':'Kuzeydoğu Anadolu',
'TRC':'Güneydoğu Anadolu',
'TRB':'Ortadoğu Anadolu'
}
def create_region_polygons(region_dict, turkey_gdf):
# Create a reverse dictionary where city codes map to region codes
city_to_region = {city_code: region_code for region_code, city_codes in region_dict.items() for city_code in city_codes}
# Create a new column 'REGION' in the GeoDataFrame that maps each city to its region
turkey_gdf['REGION'] = turkey_gdf['iso_3166_2'].map(city_to_region)
# Dissolve the GeoDataFrame on the 'REGION' column to combine city polygons into region polygons
region_gdf = turkey_gdf.dissolve(by='REGION')
# Reset the index of the new GeoDataFrame
region_gdf.reset_index(inplace=True)
return region_gdf.drop(columns=['iso_3166_2'])
turkey_regions = create_region_polygons(region_dict, turkey)
# Rename 'REGION' column to 'ISO'
turkey_regions = turkey_regions.rename(columns={'REGION': 'iso_3166_2'})
# Map the region_name_dict to a new 'NAME_1' column
turkey_regions['name'] = turkey_regions['iso_3166_2'].map(region_name_dict)
not speed_run and turkey_regions.plot(figsize=(10, 7), **plot_styles)
france = df[df.admin == 'France']
not speed_run and france.plot(**plot_styles)
Move the Overseas departments and regions of France closer to mainland.
Fix some department names and region codes
def replace_column(column, df, old, new):
if old in df[column].values:
df.loc[df[column] == old, column] = new
replace_column('name', france, 'Seien-et-Marne', 'Seine-et-Marne')
replace_column('name', france, 'Haute-Rhin', 'Haut-Rhin')
replace_column('region_cod', france, 'FR-IDF\t', 'FR-IDF')
france_copy = france.copy()
reposition(france_copy, france.name=='Guadeloupe', 57.4, 25.4, 1.5, 1.5)
reposition(france_copy, france.name=='Martinique', 58.4, 27.1, 1.5, 1.5)
reposition(france_copy, france.name=='Guyane française', 52, 37.7, 0.35, 0.35)
reposition(france_copy, france.name=='La Réunion', -55, 62.8, 1.5, 1.5)
reposition(france_copy, france.name=='Mayotte', -43, 54.3, 1.5, 1.5)
not speed_run and france_copy.plot(figsize=(8, 8), **plot_styles)
france_regions = france_copy[['geometry','region_cod','region']]
france_regions = france_regions.dissolve(by=['region_cod', 'region']).reset_index()
france_regions = france_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})
not speed_run and france_regions.plot(figsize=(10, 7), **plot_styles)
This step creates a map of France with Overseas, in a friendly layout to see all territories and make them easy to see and interact with.
france_overseas = france.copy()
reposition(france_overseas, france.name=='Guadeloupe', 53.2, 29, 1.5, 1.5)
reposition(france_overseas, france.name=='Martinique', 52.8, 27.5, 1.5, 1.5)
reposition(france_overseas, france.name=='Guyane française', 45, 35.5, 0.3, 0.3)
reposition(france_overseas, france.name=='La Réunion', -58.2, 60.5, 1.5, 1.5)
reposition(france_overseas, france.name=='Mayotte', -50.5, 52.2, 2, 2)
# Tahiti
tahiti_data = df[(df['admin'] == 'French Polynesia') & (df['name'] == 'Windward Islands')]
# Remove Rimatuu to avoid confusion with Corsica when displayed on the map
windward_geom = tahiti_data.iloc[0].geometry
filtered_geom = MultiPolygon([geom for i, geom in enumerate(windward_geom.geoms) if i != 1])
# Update the geometry in the tahiti_data DataFrame
tahiti_data.at[tahiti_data.index[0], 'geometry'] = filtered_geom
france_overseas = pd.concat([france_overseas, tahiti_data], ignore_index=True)
reposition(france_overseas, france_overseas.name=='Windward Islands', 158.2, 57.3, 2, 2)
# Kerguelen
kerguelen_data = df[(df['admin'] == 'French Southern and Antarctic Lands') & (df['name'] == 'Archipel des Kerguelen')]
france_overseas = pd.concat([france_overseas, kerguelen_data], ignore_index=True)
reposition(france_overseas, france_overseas.name=='Archipel des Kerguelen', -63.5, 88.5, 0.9, 0.9)
# Wallis and Futuna
wallis_futuna_data = df[(df['admin'] == 'Wallis and Futuna') & (df['name'].isin(['Alo', '`Uvea']))]
reposition(wallis_futuna_data, wallis_futuna_data.name=='Alo', 11.3, 1.1)
reposition(wallis_futuna_data, wallis_futuna_data.name=='`Uvea', 9.5, 0.2)
wallis_futuna_merged = wallis_futuna_data.dissolve(by='admin').reset_index()
france_overseas = pd.concat([france_overseas, wallis_futuna_merged], ignore_index=True)
reposition(france_overseas, france_overseas.admin=='Wallis and Futuna', 170, 52.5, 4, 4)
# New Caledonia
new_caledonia_data = df[(df['admin'] == 'New Caledonia')]
new_caledonia_merged = new_caledonia_data.dissolve(by='admin').reset_index()
france_overseas = pd.concat([france_overseas, new_caledonia_merged], ignore_index=True)
reposition(france_overseas, france_overseas.admin=='New Caledonia', -165.5, 60.4, 0.4, 0.4)
# Saint Pierre and Miquelon
saint_pierre_and_miquelon_data = df[((df['admin'] == 'Saint Pierre and Miquelon'))]
saint_pierre_and_miquelon_merged = saint_pierre_and_miquelon_data.dissolve(by='admin').reset_index()
france_overseas = pd.concat([france_overseas, saint_pierre_and_miquelon_merged], ignore_index=True)
reposition(france_overseas, france_overseas.admin=='Saint Pierre and Miquelon', 48, 4, 3, 3)
# Saint Martin
saint_martin_data = df[(df['admin'] == 'Saint Martin')]
france_overseas = pd.concat([france_overseas, saint_martin_data], ignore_index=True)
reposition(france_overseas, france_overseas.admin=='Saint Martin', 54.8, 30.3, 5, 5)
# Saint Barthélémy
saint_barthelemy_data = df[(df['admin'] == 'Saint Barthelemy')]
france_overseas = pd.concat([france_overseas, saint_barthelemy_data], ignore_index=True)
reposition(france_overseas, france_overseas.admin=='Saint Barthelemy', 54.5, 30, 8, 8)
# Reposition Paris, and Departements 92 93 94 so that we can actually see them
paris_and_littlecrowndpts = france_overseas[france_overseas['name'].isin(['Paris', 'Hauts-de-Seine', 'Seine-Saint-Denis', 'Val-de-Marne'])]
grouped_geometry = MultiPolygon(paris_and_littlecrowndpts['geometry'].tolist())
grouped_geometry_transformed = shapely.affinity.scale(shapely.affinity.translate(grouped_geometry, xoff=6.3, yoff=2.3), xfact=3, yfact=3)
transformed_geometries = list(grouped_geometry_transformed.geoms)
paris_and_littlecrowndpts_copy = paris_and_littlecrowndpts.copy()
paris_and_littlecrowndpts_copy['geometry'] = transformed_geometries
france_overseas = france_overseas[~france_overseas['name'].isin(['Paris', 'Hauts-de-Seine', 'Seine-Saint-Denis', 'Val-de-Marne'])]
france_overseas = pd.concat([france_overseas, paris_and_littlecrowndpts_copy], ignore_index=True)
# Update metadata properly
france_overseas.loc[france_overseas['name'] == 'Windward Islands', ['name', 'iso_3166_2']] = ['Polynésie française', 'FR-PF']
france_overseas.loc[france_overseas['name'] == 'Archipel des Kerguelen', ['name', 'iso_3166_2']] = ['Terres australes et antarctiques françaises', 'FR-TF']
france_overseas.loc[france_overseas['admin'] == 'Wallis and Futuna', ['name', 'iso_3166_2']] = ['Wallis et Futuna', 'FR-WF']
france_overseas.loc[france_overseas['admin'] == 'New Caledonia', ['name', 'iso_3166_2']] = ['Nouvelle-Calédonie', 'FR-NC']
france_overseas.loc[france_overseas['admin'] == 'Saint Pierre and Miquelon', ['name', 'iso_3166_2']] = ['Saint-Pierre-et-Miquelon', 'FR-PM']
france_overseas.loc[france_overseas['admin'] == 'Saint Martin', ['name', 'iso_3166_2']] = ['Saint-Martin', 'FR-MF']
france_overseas.loc[france_overseas['admin'] == 'Saint Barthelemy', ['name', 'iso_3166_2']] = ['Saint-Barthélémy', 'FR-BL']
# Plot data
france_overseas = france_overseas.rename(columns={'NAME_1': 'name','ISO': 'iso_3166_2'})
not speed_run and france_overseas.plot(figsize=(15, 15), **plot_styles)
italy_regions = df[df.admin == 'Italy'][['geometry','region_cod','region']]
italy_regions = italy_regions.dissolve(by=['region_cod', 'region']).reset_index()
italy_regions = italy_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})
not speed_run and italy_regions.plot(figsize=(10, 7), **plot_styles)
def apply_bounds(df, northwest, southeast):
x1, y1 = northwest
x2, y2 = southeast
boundry = shapely.geometry.Polygon([(x1, y1),(x1, y2), (x2, y2), (x2, y1)])
df = df.copy()
return df[df.geometry.apply(lambda x: boundry.contains(x))]
netherlands = df[df.admin == 'Netherlands']
not speed_run and netherlands.plot(**plot_styles)
netherlands_copy = apply_bounds(netherlands, (-20, 60), (20, 20))
not speed_run and netherlands_copy.plot(figsize=(8, 8), **plot_styles)
The administrative division in the Natural Earth dataset is outdated since the 2021 subdivision reform, see https://en.wikipedia.org/wiki/Administrative_divisions_of_Latvia.
latvia_copy = df[df.admin == 'Latvia'].copy()
latvia_geojson_url = "https://raw.githubusercontent.com/eriks47/latvia/main/latvia.geojson"
try:
latvia_gdf = gpd.read_file(latvia_geojson_url)
latvia_copy = gpd.GeoDataFrame(
latvia_gdf,
geometry='geometry',
crs=latvia_gdf.crs
)
print("GeoJSON file for Latvia downloaded and loaded successfully.")
except Exception as e:
print(f"Unable to download or load the GeoJSON file for Latvia. Error: {str(e)}")
print("Please download the file from the URL and try again.")
not speed_run and latvia_copy.plot(**plot_styles)
uk = df[df.admin == 'United Kingdom']
not speed_run and uk.plot(**plot_styles)
uk_copy = apply_bounds(uk, (-10, 60), (20, 20))
not speed_run and uk_copy.plot(figsize=(8, 8), **plot_styles)
ph_url = "https://raw.githubusercontent.com/jdruii/phgeojson/main/philippines.geojson"
philippines_copy = gpd.read_file(ph_url)
philippines_copy = philippines_copy.rename(columns={'NAME_1': 'name','ISO': 'iso_3166_2'})
not speed_run and philippines_copy.plot(**plot_styles)
philippines_regions = df[df.admin == 'Philippines'][['geometry','region_cod','region']]
philippines_regions = philippines_regions.dissolve(by=['region_cod', 'region']).reset_index()
philippines_regions = philippines_regions.rename(columns={'region': 'name', 'region_cod': 'iso_3166_2'})
philippines_regions['name'] = philippines_regions['name'].replace({
'Dinagat Islands (Region XIII)': 'Caraga Administrative Region (Region XIII)',
'Autonomous Region in Muslim Mindanao (ARMM)': 'Bangsamoro Autonomous Region in Muslim Mindanao (BARMM)'
})
not speed_run and philippines_regions.plot(figsize = (10, 7), **plot_styles)
vietnam = df[df.admin == 'Vietnam']
vietnam_copy = vietnam.copy()
replace_column('name', vietnam_copy, 'Ðong Tháp', 'Đồng Tháp')
replace_column('name', vietnam_copy, 'Son La', 'Sơn La')
replace_column('name', vietnam_copy, 'Ha Tinh', 'Hà Tĩnh')
replace_column('name', vietnam_copy, 'Quàng Nam', 'Quảng Nam')
replace_column('name', vietnam_copy, 'Lai Chau', 'Lai Châu')
replace_column('name', vietnam_copy, 'Hồ Chí Minh city', 'Thành phố Hồ Chí Minh')
replace_column('name', vietnam_copy, 'Hau Giang', 'Hậu Giang')
replace_column('name', vietnam_copy, 'Ha Noi', 'Hà Nội')
replace_column('name', vietnam_copy, 'Can Tho', 'Cần Thơ')
replace_column('name', vietnam_copy, 'Đông Nam Bộ', 'Đồng Nai')
replace_column('name', vietnam_copy, 'Đông Bắc', 'Bắc Kạn')
replace_column('name', vietnam_copy, 'Đồng Bằng Sông Hồng', 'Hưng Yên')
for i in vietnam_copy['name']:
print(i)
# Gather up all the tweaked maps!
alt_maps = {
"finland": finland_copy,
"china": china_copy,
"usa": usa_copy,
"france": france_copy,
"france_regions": france_regions,
"france_overseas": france_overseas,
"turkey_regions": turkey_regions,
"italy_regions": italy_regions,
"philippines_regions": philippines_regions,
"latvia": latvia_copy,
"netherlands": netherlands_copy,
"norway": norway_copy,
"uk": uk_copy,
"russia": russia_copy,
"spain": spain_copy,
"portugal": portugal_copy,
"ukraine": ukraine_with_crimea,
"india": india_copy,
"vietnam": vietnam_copy
}
# Filter out countries that only have one region, making them effectively useless as a choropleth
def get_num_subdivisions(country):
gdf = get_gdf(country)
subdivisions = gdf['iso_3166_2'].unique()
if len(subdivisions) == 1:
print(country, "has only one subdivision - removing from countries array")
return len(subdivisions)
# we add the unnecessaery countries to a list here, for clearing out unneeded geojson files later
countries_to_purge = [country for country in countries if get_num_subdivisions(country) <= 1]
# now we purge those from our main "countries" list to continue processing
countries = [country for country in countries if get_num_subdivisions(country) > 1]
simplify_factors = {
"uk": 0.005,
}
useful_columns = ["ISO", "NAME_1", "geometry"]
def get_simplify_factor_by_size(gdf):
xmin, ymin, xmax, ymax = shapely.ops.unary_union(gdf["geometry"]).bounds
size = (xmax - xmin) * (ymax - ymin)
print("Size", round(size, 3), end="\t")
if size > 1000: return 0.03
if size > 300: return 0.02
if size > 100: return 0.01
return 0
def simplify_if_needed(country, gdf):
"""Simplify the maps based on country size"""
country_alias = country_name_aliases.get(country, country)
if country_alias in df_50m["admin"].str.lower().unique():
return
factor = simplify_factors.get(country) or get_simplify_factor_by_size(gdf)
if factor:
gdf["geometry"] = gdf.simplify(factor)
def save_geojson(country):
gdf = get_gdf(country)
print(country, end="\t")
# For backward compatibility
gdf["ISO"] = gdf["iso_3166_2"]
gdf["NAME_1"] = gdf["name"]
simplify_if_needed(country, gdf)
print(f'Saving geojson for {country}...')
filename_country = country.replace(' ', '_')
gdf[useful_columns].to_file(f"../src/countries/{filename_country}.geojson", driver="GeoJSON")
for country in countries_to_purge:
filename_country = country.replace(' ', '_')
filepath = f"../src/countries/{filename_country}.geojson"
if os.path.exists(filepath):
os.remove(filepath)
print(f"Purged {filepath} since it has only one region")
for country in countries:
save_geojson(country)
# this overwrites some of the above... could be optimized
for country in alt_maps:
save_geojson(country)
print("Done. ")
not speed_run and plot_all_countries(countries)
not speed_run and plot_all_countries(alt_maps)
# Function to convert country name to a valid JavaScript identifier
def to_js_identifier(name):
return name.replace(' ', '_').replace('-', '_')
# License boilerplate
license_boilerplate = """/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
"""
countries_combined = list(set([*countries, *alt_maps.keys()]))
countries_combined = sorted(countries_combined)
# Generate TypeScript import statements
imports = "\n".join([f"import {to_js_identifier(country)} from './countries/{to_js_identifier(country)}.geojson';" for country in countries_combined])
# Generate the export object
exports = "export const countries = {\n " + ",\n ".join([to_js_identifier(country) for country in countries_combined]) + ",\n};"
# Additional exports
additional_exports = """
export const countryOptions = Object.keys(countries).map(x => {
if (x === 'uk' || x === 'usa') {
return [x, x.toUpperCase()];
}
if (x === 'italy_regions') {
return [x, 'Italy (regions)'];
}
if (x === 'france_regions') {
return [x, 'France (regions)'];
}
if (x === 'france_overseas') {
return [x, 'France (with overseas)'];
}
if (x === 'turkey_regions') {
return [x, 'Turkey (regions)'];
}
return [
x,
x
.split('_')
.map(e => e[0].toUpperCase() + e.slice(1))
.join(' '),
];
});
export default countries;
"""
# Combine license, imports, exports, and additional exports
typescript_code = f"{license_boilerplate}\n{imports}\n\n{exports}\n{additional_exports}"
# Write to a file
with open("../src/countries.ts", "w") as file:
file.write(typescript_code)
print("TypeScript code written to src/countries.ts")
# DOCS JSON:
# Replace underscores with spaces and title-case each country name
formatted_countries = [country.replace("_", " ") for country in countries_combined]
formatted_countries = [country.upper() if country in {"usa", "uk"} else country.title() for country in formatted_countries]
formatted_countries = [country.replace(" Regions"," (regions)") for country in formatted_countries]
formatted_countries = [country.replace(" Overseas"," (with overseas)") for country in formatted_countries]
# Create a dictionary in the desired format
data = {"countries": formatted_countries}
# Convert the dictionary to a JSON string with proper formatting
json_data = json.dumps(data, indent=2) + "\n"
# Write to a file
with open("../../../../docs/data/countries.json", "w") as file:
file.write(json_data)
print("JSON written to docs/data/countries.json")