Alexander Dunkel, Institute of Cartography, TU Dresden
Visualization of temporal patterns for submissions and comments from Nationalpark-Subreddits.
import os, sys
from pathlib import Path
import psycopg2
import geopandas as gp
import pandas as pd
import seaborn as sns
import calendar
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import matplotlib.patheffects as pe
import matplotlib.patches as mpatches
from typing import List, Tuple, Dict, Optional
from IPython.display import clear_output, display, HTML
from datetime import datetime
from pyproj import Transformer
module_path = str(Path.cwd().parents[0] / "py")
if module_path not in sys.path:
sys.path.append(module_path)
from modules.base import tools, hll
from modules.base.hll import union_hll, cardinality_hll
%load_ext autoreload
%autoreload 2
OUTPUT = Path.cwd().parents[0] / "out" # output directory for figures (etc.)
WORK_DIR = Path.cwd().parents[0] / "tmp" # Working directory
(OUTPUT / "figures").mkdir(exist_ok=True)
(OUTPUT / "svg").mkdir(exist_ok=True)
WORK_DIR.mkdir(exist_ok=True)
Plot styling
plt.style.use('default')
CRS_WGS = "epsg:4326" # WGS1984
CRS_PROJ = "esri:54009" # Mollweide
Define Transformer ahead of time with xy-order of coordinates
PROJ_TRANSFORMER = Transformer.from_crs(
CRS_WGS, CRS_PROJ, always_xy=True)
This is the habitat range calculated by iNaturalist observations. The area is currently very broad and includes many locations, where Milvus milvus will be only very seldom observable.
DATA_FOLDER = Path.cwd().parents[0] / "00_data" / "milvus"
range_kml = DATA_FOLDER / "range.kml"
from fiona.drvsupport import supported_drivers
supported_drivers['KML'] = 'rw'
df = gp.read_file(range_kml, driver='KML', ignore_geometry=False)
print(df.crs)
save to Geopackage, for archive purposes
df.to_file(
OUTPUT/ 'Milvusmilvus_range.gpkg', driver='GPKG', layer='Milvus milvus')
project to Mollweide
gdf_range = df.to_crs(CRS_PROJ)
ax = gdf_range.plot()
ax.set_axis_off()
Load observation data. This data was requested with the iNaturalist Export tool, based on a species search for Milvus. Milvus includes 2 species.
df = pd.read_csv(DATA_FOLDER / "observations-350501.csv")
df.head()
df.columns
gdf = gp.GeoDataFrame(
df, geometry=gp.points_from_xy(df.longitude, df.latitude), crs=CRS_WGS)
gdf = gdf.to_crs(CRS_PROJ)
Load world countries geometry
source_zip = "https://naciscdn.org/naturalearth/110m/cultural/"
filename = "ne_110m_admin_0_countries.zip"
shapes_name = "ne_110m_admin_0_countries.shp"
SHAPE_DIR = (OUTPUT / "shapes")
SHAPE_DIR.mkdir(exist_ok=True)
if not (SHAPE_DIR / shapes_name).exists():
tools.get_zip_extract(uri=source_zip, filename=filename, output_path=SHAPE_DIR)
else:
print("Already exists")
world = gp.read_file(
SHAPE_DIR / "ne_110m_admin_0_countries.shp")
world = world.to_crs(CRS_PROJ)
print(str(world.columns))
bbox_europe = -25, 35, 35, 59
# convert to Mollweide
minx, miny = PROJ_TRANSFORMER.transform(
bbox_europe[0], bbox_europe[1])
maxx, maxy = PROJ_TRANSFORMER.transform(
bbox_europe[2], bbox_europe[3])
range_color = "#810f7c"
observation_color = "red"
fig, ax = plt.subplots(1, 1, figsize=(6, 9))
gdf_range.plot(
ax=ax,
facecolor=range_color,
edgecolor=None,
alpha=0.4)
gdf.plot(
ax=ax,
markersize=.1,
facecolor=observation_color,
edgecolor=None,
alpha=0.9)
world.plot(
ax=ax, color='none', edgecolor='black', linewidth=0.3)
ax.set_xlim(minx, maxx)
ax.set_ylim(miny, maxy)
ax.axis('off')
range_patch = mpatches.Patch(
color=range_color,
label='Range', alpha=0.4)
obs_patch = mpatches.Patch(
color=observation_color,
label='Observations', alpha=0.9)
legend_entries = [range_patch, obs_patch]
legend_kwds = {
"bbox_to_anchor": (1.0, 1),
"loc":'upper left',
"fontsize":8, "frameon":False,
"title":"Milvus milvus", "title_fontsize":8,
"alignment":"left"}
ax.legend(
handles=legend_entries, **legend_kwds)
fig.tight_layout()
fig.show()
We just want to see distribution and major clusters. A more interactive approach is shown by João Paulo Figueira's Geographic Clustering with HDBSCAN
data = np.array(list(zip(df.longitude, df.latitude)))
import hdbscan
X = np.radians(data) # convert the list of lat/lon coordinates to radians
earth_radius_km = 6371
epsilon = 0.005 / earth_radius_km # calculate 5 meter epsilon threshold
clusterer = hdbscan.HDBSCAN(min_cluster_size=100, metric='haversine').fit(X)
color_palette = sns.color_palette('deep', len(clusterer.labels_))
cluster_colors = [color_palette[x] if x >= 0
else (0.5, 0.5, 0.5)
for x in clusterer.labels_]
cluster_member_colors = [sns.desaturate(x, p) for x, p in
zip(cluster_colors, clusterer.probabilities_)]
fig, ax = plt.subplots()
ax.scatter(*data.T, s=50, linewidth=0, c=cluster_member_colors, alpha=0.25)
ax.set_xlim(bbox_europe[0], bbox_europe[1])
ax.set_ylim(bbox_europe[2], bbox_europe[3])
ax.axis('off')
fig.show()
!jupyter nbconvert --to html_toc \
--output-dir=../resources/html/ ./04_inaturalist_chi.ipynb \
--template=../nbconvert.tpl \
--ExtractOutputPreprocessor.enabled=False >&- 2>&-