iNaturalist Data Exploration: Milvus milvus

Alexander Dunkel, Institute of Cartography, TU Dresden

•••
Out[1]:

Last updated: Aug-08-2023, Carto-Lab Docker Version 0.14.0

Visualization of temporal patterns for submissions and comments from Nationalpark-Subreddits.

Preparations

In [1]:
import os, sys
from pathlib import Path
import psycopg2
import geopandas as gp
import pandas as pd
import seaborn as sns
import calendar
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import matplotlib.patheffects as pe
import matplotlib.patches as mpatches
from typing import List, Tuple, Dict, Optional
from IPython.display import clear_output, display, HTML
from datetime import datetime
from pyproj import Transformer
In [2]:
module_path = str(Path.cwd().parents[0] / "py")
if module_path not in sys.path:
    sys.path.append(module_path)
from modules.base import tools, hll
from modules.base.hll import union_hll, cardinality_hll
In [3]:
%load_ext autoreload
%autoreload 2
In [4]:
OUTPUT = Path.cwd().parents[0] / "out"       # output directory for figures (etc.)
WORK_DIR = Path.cwd().parents[0] / "tmp"     # Working directory
In [5]:
(OUTPUT / "figures").mkdir(exist_ok=True)
(OUTPUT / "svg").mkdir(exist_ok=True)
WORK_DIR.mkdir(exist_ok=True)

Plot styling

In [6]:
plt.style.use('default')
In [7]:
CRS_WGS = "epsg:4326" # WGS1984
CRS_PROJ = "esri:54009" # Mollweide

Define Transformer ahead of time with xy-order of coordinates

In [8]:
PROJ_TRANSFORMER = Transformer.from_crs(
    CRS_WGS, CRS_PROJ, always_xy=True)

Load Milvus range

This is the habitat range calculated by iNaturalist observations. The area is currently very broad and includes many locations, where Milvus milvus will be only very seldom observable.

In [9]:
DATA_FOLDER = Path.cwd().parents[0] / "00_data" / "milvus"
range_kml = DATA_FOLDER / "range.kml"
In [10]:
from fiona.drvsupport import supported_drivers
supported_drivers['KML'] = 'rw'

df = gp.read_file(range_kml, driver='KML', ignore_geometry=False)
print(df.crs)
epsg:4326

save to Geopackage, for archive purposes

In [11]:
df.to_file(
    OUTPUT/ 'Milvusmilvus_range.gpkg', driver='GPKG', layer='Milvus milvus')  

project to Mollweide

In [12]:
gdf_range = df.to_crs(CRS_PROJ)
In [13]:
ax = gdf_range.plot()
ax.set_axis_off()

Load Milvus milvus observations

Load observation data. This data was requested with the iNaturalist Export tool, based on a species search for Milvus. Milvus includes 2 species.

In [14]:
df = pd.read_csv(DATA_FOLDER / "observations-350501.csv")
In [15]:
df.head()
Out[15]:
id observed_on_string observed_on time_observed_at time_zone user_id user_login user_name created_at updated_at ... geoprivacy taxon_geoprivacy coordinates_obscured positioning_method positioning_device species_guess scientific_name common_name iconic_taxon_name taxon_id
0 7793 May 22, 2010 13:59 2010-05-22 2010-05-22 12:59:00 UTC London 446 kevinandseri NaN 2010-07-10 09:15:17 UTC 2016-06-22 18:30:14 UTC ... NaN open False NaN NaN Red Kite Milvus milvus Rotmilan Aves 5267
1 9495 2011-01-01 11:29 2011-01-01 2011-01-01 11:29:00 UTC London 351 zabdiel John Proctor 2011-01-01 11:31:50 UTC 2016-06-22 18:32:46 UTC ... NaN open False NaN NaN Red Kite Milvus milvus Rotmilan Aves 5267
2 9496 2011-01-01 11:42 2011-01-01 2011-01-01 11:42:00 UTC London 351 zabdiel John Proctor 2011-01-01 11:43:09 UTC 2016-06-22 18:32:46 UTC ... NaN open False NaN NaN red kite Milvus milvus Rotmilan Aves 5267
3 14694 2011-04-17 2011-04-17 NaN London 654 spookypeanut NaN 2011-04-17 18:18:12 UTC 2016-06-22 18:44:11 UTC ... NaN open False NaN NaN Red Kite Milvus milvus Rotmilan Aves 5267
4 50134 2011-10-05 2011-10-05 NaN Berlin 4130 michael Michael 2012-02-01 19:59:12 UTC 2016-06-22 19:39:03 UTC ... NaN open False NaN NaN Milvus milvus Milvus milvus Rotmilan Aves 5267

5 rows × 39 columns

In [16]:
df.columns
Out[16]:
Index(['id', 'observed_on_string', 'observed_on', 'time_observed_at',
       'time_zone', 'user_id', 'user_login', 'user_name', 'created_at',
       'updated_at', 'quality_grade', 'license', 'url', 'image_url',
       'sound_url', 'tag_list', 'description', 'num_identification_agreements',
       'num_identification_disagreements', 'captive_cultivated',
       'oauth_application_id', 'place_guess', 'latitude', 'longitude',
       'positional_accuracy', 'private_place_guess', 'private_latitude',
       'private_longitude', 'public_positional_accuracy', 'geoprivacy',
       'taxon_geoprivacy', 'coordinates_obscured', 'positioning_method',
       'positioning_device', 'species_guess', 'scientific_name', 'common_name',
       'iconic_taxon_name', 'taxon_id'],
      dtype='object')
In [17]:
gdf = gp.GeoDataFrame(
    df, geometry=gp.points_from_xy(df.longitude, df.latitude), crs=CRS_WGS)
gdf = gdf.to_crs(CRS_PROJ)

Load world countries geometry

In [18]:
source_zip = "https://naciscdn.org/naturalearth/110m/cultural/"
filename = "ne_110m_admin_0_countries.zip"
shapes_name = "ne_110m_admin_0_countries.shp"
In [19]:
SHAPE_DIR = (OUTPUT / "shapes")
SHAPE_DIR.mkdir(exist_ok=True)

if not (SHAPE_DIR / shapes_name).exists():
    tools.get_zip_extract(uri=source_zip, filename=filename, output_path=SHAPE_DIR)
else:
    print("Already exists")
Already exists
In [20]:
world = gp.read_file(
    SHAPE_DIR / "ne_110m_admin_0_countries.shp")
world = world.to_crs(CRS_PROJ)
In [21]:
print(str(world.columns))
Index(['featurecla', 'scalerank', 'LABELRANK', 'SOVEREIGNT', 'SOV_A3',
       'ADM0_DIF', 'LEVEL', 'TYPE', 'TLC', 'ADMIN',
       ...
       'FCLASS_TR', 'FCLASS_ID', 'FCLASS_PL', 'FCLASS_GR', 'FCLASS_IT',
       'FCLASS_NL', 'FCLASS_SE', 'FCLASS_BD', 'FCLASS_UA', 'geometry'],
      dtype='object', length=169)
In [22]:
bbox_europe = -25, 35, 35, 59
# convert to Mollweide
minx, miny = PROJ_TRANSFORMER.transform(
    bbox_europe[0], bbox_europe[1])
maxx, maxy = PROJ_TRANSFORMER.transform(
    bbox_europe[2], bbox_europe[3])
In [23]:
range_color = "#810f7c"
observation_color = "red"
In [31]:
fig, ax = plt.subplots(1, 1, figsize=(6, 9))

gdf_range.plot(
    ax=ax,
    facecolor=range_color,
    edgecolor=None,
    alpha=0.4)
gdf.plot(
    ax=ax,
    markersize=.1,
    facecolor=observation_color,
    edgecolor=None,
    alpha=0.9)
world.plot(
    ax=ax, color='none', edgecolor='black', linewidth=0.3)
ax.set_xlim(minx, maxx)
ax.set_ylim(miny, maxy)
ax.axis('off')
range_patch = mpatches.Patch(
    color=range_color,
    label='Range', alpha=0.4)
obs_patch = mpatches.Patch(
    color=observation_color,
    label='Observations', alpha=0.9)
legend_entries = [range_patch, obs_patch]
legend_kwds = {
    "bbox_to_anchor": (1.0, 1),
    "loc":'upper left',
    "fontsize":8, "frameon":False,
    "title":"Milvus milvus", "title_fontsize":8,
    "alignment":"left"}
ax.legend(
    handles=legend_entries, **legend_kwds)
fig.tight_layout()
fig.show()

Find clusters (HDBSCAN)

We just want to see distribution and major clusters. A more interactive approach is shown by João Paulo Figueira's Geographic Clustering with HDBSCAN

In [190]:
data = np.array(list(zip(df.longitude, df.latitude)))
In [191]:
import hdbscan
X = np.radians(data) # convert the list of lat/lon coordinates to radians
earth_radius_km = 6371
epsilon = 0.005 / earth_radius_km # calculate 5 meter epsilon threshold

clusterer = hdbscan.HDBSCAN(min_cluster_size=100, metric='haversine').fit(X)
In [193]:
color_palette = sns.color_palette('deep', len(clusterer.labels_))
cluster_colors = [color_palette[x] if x >= 0
                  else (0.5, 0.5, 0.5)
                  for x in clusterer.labels_]
cluster_member_colors = [sns.desaturate(x, p) for x, p in
                         zip(cluster_colors, clusterer.probabilities_)]
fig, ax = plt.subplots()
ax.scatter(*data.T, s=50, linewidth=0, c=cluster_member_colors, alpha=0.25)
ax.set_xlim(bbox_europe[0], bbox_europe[1])
ax.set_ylim(bbox_europe[2], bbox_europe[3])
ax.axis('off')

fig.show()

Load Milvus milvus observations

In [ ]:
 

Create notebook HTML

In [320]:
!jupyter nbconvert --to html_toc \
    --output-dir=../resources/html/ ./04_inaturalist_chi.ipynb \
    --template=../nbconvert.tpl \
    --ExtractOutputPreprocessor.enabled=False >&- 2>&-
In [ ]: