EO Data Example#

This notebook shows a practical example showing how to exploire Sentinel-2 Level 2A data in HEALPix.

The data we show here is created using notebook at jmdelouis/Sentinel2healgrid

  • Basic visualizations:

    • We will open HEALPix/Zarr Sentineo 2 test data.

    • Selecting areas/cells

    • Interactive plotting DGGS grids over a map

  • Simple analytics:

    • Example: time series over selected cells.

import healpy as hp
import xarray as xr
import xdggs
import fsspec
import sparse
dtotal=xr.open_zarr(
    'https://data-taos.ifremer.fr/EGU25_CFOSAT/Sentinel2_test.zarr',
).pipe(xdggs.decode)
dtotal
/var/folders/1c/q1jqr0h541n720bvcqb_0rsm001mmz/T/ipykernel_88203/1708922290.py:1: SerializationWarning: Unable to decode time axis into full numpy.datetime64 objects, continuing using cftime.datetime objects instead, reason: dates prior reform date (1582-10-15). To silence this warning specify 'use_cftime=True'.
  dtotal=xr.open_zarr(
<xarray.Dataset> Size: 2GB
Dimensions:    (time: 88, bands: 4, cells: 676289)
Coordinates:
  * bands      (bands) <U3 48B 'b02' 'b03' 'b04' 'b08'
  * cell_ids   (cells) int64 5MB dask.array<chunksize=(676289,), meta=np.ndarray>
  * time       (time) datetime64[ns] 704B 2019-07-29T10:30:31 ... 2022-08-12T...
Dimensions without coordinates: cells
Data variables:
    Sentinel2  (time, bands, cells) float64 2GB dask.array<chunksize=(1, 4, 676289), meta=np.ndarray>
Indexes:
    cell_ids  HealpixIndex(level=19, indexing_scheme=nested)
dtotal=dtotal.assign_coords(
    dtotal.dggs.cell_centers().coords
)
dtotal
<xarray.Dataset> Size: 2GB
Dimensions:    (time: 88, bands: 4, cells: 676289)
Coordinates:
  * bands      (bands) <U3 48B 'b02' 'b03' 'b04' 'b08'
  * cell_ids   (cells) int64 5MB dask.array<chunksize=(676289,), meta=np.ndarray>
  * time       (time) datetime64[ns] 704B 2019-07-29T10:30:31 ... 2022-08-12T...
    latitude   (cells) float64 5MB 52.25 52.25 52.25 52.25 ... 52.34 52.34 52.34
    longitude  (cells) float64 5MB 10.59 10.59 10.59 10.59 ... 10.51 10.51 10.51
Dimensions without coordinates: cells
Data variables:
    Sentinel2  (time, bands, cells) float64 2GB dask.array<chunksize=(1, 4, 676289), meta=np.ndarray>
Indexes:
    cell_ids  HealpixIndex(level=19, indexing_scheme=nested)
dtotal.isel(time=0).Sentinel2.compute().dggs.explore(
    center=0, cmap="viridis", alpha=0.4
)
latitude = 52.31410846097136
longitude = 10.577647748067031

point=dtotal.dggs.sel_latlon(
    longitude=longitude, latitude=latitude
).Sentinel2.isel(time=slice(0,30)).compute()
point
<xarray.DataArray 'Sentinel2' (time: 30, bands: 4, cells: 1)> Size: 960B
array([[[15440.],
        [13400.],
        [12112.],
        [11128.]],

       [[ 1362.],
        [ 1384.],
        [ 1226.],
        [ 3876.]],

       [[ 1080.],
        [ 1269.],
        [ 1138.],
        [ 5136.]],

       [[ 1680.],
        [ 1612.],
        [ 1472.],
        [ 3134.]],

...

       [[ 3562.],
        [ 3664.],
        [ 3836.],
        [ 5360.]],

       [[12536.],
        [11472.],
        [10928.],
        [10416.]],

       [[ 1636.],
        [ 1650.],
        [ 1425.],
        [ 3190.]],

       [[ 1289.],
        [ 1438.],
        [ 1263.],
        [ 4460.]]])
Coordinates:
  * bands      (bands) <U3 48B 'b02' 'b03' 'b04' 'b08'
  * cell_ids   (cells) int64 8B 198445532943
  * time       (time) datetime64[ns] 240B 2019-07-29T10:30:31 ... 2007-09-20T...
    latitude   (cells) float64 8B 52.31
    longitude  (cells) float64 8B 10.58
Dimensions without coordinates: cells
Indexes:
    cell_ids  HealpixIndex(level=19, indexing_scheme=nested)
point.dggs.explore(
    center=0, cmap="viridis", alpha=0.8
)
l_b08=point.sel(bands='b08')
l_b03=point.sel(bands='b03')

ndvi = ((l_b03 - l_b08) / (l_b03 + l_b08))

ndvi.compute().dggs.explore(alpha=0.8)
dtotal.Sentinel2.isel(time=0).compute().dggs.explore(alpha=0.8)
def create_arrow_table(polygons, arr, coords=None):
    from arro3.core import Array, ChunkedArray, Schema, Table

    if coords is None:
        coords = ["latitude", "longitude"]

    array = Array.from_arrow(polygons)
    name = arr.name or "data"
    arrow_arrays = {
        "geometry": array,
        "cell_ids": ChunkedArray([Array.from_numpy(arr.coords["cell_ids"])]),
        name: ChunkedArray([Array.from_numpy(arr.data)]),
    } | {
        coord: ChunkedArray([Array.from_numpy(arr.coords[coord].data)])
        for coord in coords
        if coord in arr.coords
    }

    fields = [array.field.with_name(name) for name, array in arrow_arrays.items()]
    schema = Schema(fields)

    return Table.from_arrays(list(arrow_arrays.values()), schema=schema)


def normalize(var, center=None):
    from matplotlib.colors import CenteredNorm, Normalize

    if center is None:
        vmin = var.min(skipna=True)
        vmax = var.max(skipna=True)
        normalizer = Normalize(vmin=vmin, vmax=vmax)
    else:
        halfrange = np.abs(var - center).max(skipna=True)
        normalizer = CenteredNorm(vcenter=center, halfrange=halfrange)

    return normalizer(var.data)


def exploire_layer(
    arr,
    cell_dim="cells",
    cmap="viridis",
    center=None,
    alpha=None,
):
    from lonboard import SolidPolygonLayer
    from lonboard.colormap import apply_continuous_cmap
    from matplotlib import colormaps

    if len(arr.dims) != 1 or cell_dim not in arr.dims:
        raise ValueError(
            f"exploration only works with a single dimension ('{cell_dim}')"
        )

    cell_ids = arr.dggs.coord.data
    grid_info = arr.dggs.grid_info

    polygons = grid_info.cell_boundaries(cell_ids, backend="geoarrow")

    normalized_data = normalize(arr.variable, center=center)

    colormap = colormaps[cmap]
    colors = apply_continuous_cmap(normalized_data, colormap, alpha=alpha)

    table = create_arrow_table(polygons, arr)
    layer = SolidPolygonLayer(table=table, filled=True, get_fill_color=colors)

    return layer
dtotal.Sentinel2
<xarray.DataArray 'Sentinel2' (time: 88, bands: 4, cells: 676289)> Size: 2GB
dask.array<open_dataset-Sentinel2, shape=(88, 4, 676289), dtype=float64, chunksize=(1, 4, 676289), chunktype=numpy.ndarray>
Coordinates:
  * bands      (bands) <U3 48B 'b02' 'b03' 'b04' 'b08'
  * cell_ids   (cells) int64 5MB dask.array<chunksize=(676289,), meta=np.ndarray>
  * time       (time) datetime64[ns] 704B 2019-07-29T10:30:31 ... 2022-08-12T...
    latitude   (cells) float64 5MB 52.25 52.25 52.25 52.25 ... 52.34 52.34 52.34
    longitude  (cells) float64 5MB 10.59 10.59 10.59 10.59 ... 10.51 10.51 10.51
Dimensions without coordinates: cells
Indexes:
    cell_ids  HealpixIndex(level=19, indexing_scheme=nested)

multibands visualisation R/G/B#

import lonboard
import numpy as np

itime=10
ratio=3E-4


itime=6
ratio=5E-4

#use of tanh to concentrate the scale variation for the lower values
ds1=np.tanh(ratio*dtotal.Sentinel2.sel(bands='b04').isel(time=itime)).compute()
ds2=np.tanh(ratio*dtotal.Sentinel2.sel(bands='b03').isel(time=itime)).compute()
ds3=np.tanh(ratio*dtotal.Sentinel2.sel(bands='b02').isel(time=itime)).compute()

lonboard.Map(
    [
        exploire_layer(
            ds1,
            alpha=0.33,
            cmap='Reds'
        ),
        exploire_layer(
            ds2, 
            alpha=0.33,
            cmap='Greens'),
        exploire_layer(
            ds3, 
            alpha=0.33,
            cmap='Blues'),
    ]
)