# coding: utf8
#
# Copyright (c) 2025 Centre National d'Etudes Spatiales (CNES).
#
# This file is part of GRIDR
# (see https://github.com/CNES/gridr).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Chain module for grid-based raster resampling with I/O management.
This module provides the high-level resampling pipeline that wraps the
core ``array_grid_resampling`` function with rasterio-based I/O,
tiled/strip processing, memory management via shared memory arrays, and
unified mask strategy resolution.
Main entry point
----------------
``basic_grid_resampling_chain``
Full pipeline from rasterio datasets to output dataset. Handles
strip/tile decomposition, shared memory allocation, and parallel
processing.
Mask strategy
-------------
The mask preparation follows the same decision logic as the core module
via ``resolve_mask_strategy`` (imported from ``grid_resampling``),
ensuring consistent behaviour between the standalone and chain code
paths for identical inputs. The chain-specific execution is handled
by ``apply_mask_strategy_chain``, which operates on a pre-allocated
buffer sized to the full marged window rather than allocating a new
padded buffer.
The ``trust_padding`` parameter controls whether extrapolated data in
the padded zone is considered valid. It must be explicitly set when
``boundary_condition`` is not ``None``.
Memory model
------------
Source data and output buffers are managed via ``SharedArray``
to support multi-process parallelism. Buffers are registered in a
list and released in a ``finally`` block to prevent memory leaks even
on error.
"""
import logging
from functools import partial
from typing import List, Optional, Tuple, Union
import numpy as np
import rasterio
from gridr.core.grid.grid_commons import grid_full_resolution_shape, grid_resolution_window_safe
from gridr.core.grid.grid_mask import Validity, build_mask
from gridr.core.grid.grid_rasterize import GeometryType, GridRasterizeAlg
from gridr.core.grid.grid_resampling import (
ResamplingMaskStrategy,
array_grid_resampling,
calculate_source_extent,
check_mask_strategy,
get_array_padded_shape,
resolve_mask_strategy,
)
from gridr.core.grid.grid_utils import array_shift_grid_coordinates
from gridr.core.interp.bspline_prefiltering import (
array_bspline_prefiltering,
array_bspline_prefiltering_mask_safe_win,
)
from gridr.core.interp.interpolator import (
Interpolator,
InterpolatorIdentifier,
get_interpolator,
is_bspline,
)
from gridr.core.utils import chunks
from gridr.core.utils.array_pad import pad_inplace
from gridr.core.utils.array_utils import ArrayProfile, array_convert, array_replace
from gridr.core.utils.array_window import (
as_rio_window,
complementary_window_indices,
window_check,
window_from_chunk,
window_from_indices,
window_indices,
window_shape,
window_shift,
)
from gridr.io.common import GridRIOMode
#from gridr.scaling.shmutils import SharedMemoryArray, create_and_register_sma
from gridr.scaling.shared_array import (
SharedArray,
create_and_register,
get_backend,
)
DEFAULT_IO_STRIP_SIZE = 1000
DEFAULT_TILE_SHAPE = (1000, 1000)
DEFAULT_NCPU = 1
READ_TILE_MIN_SIZE = (1000, 1000)
GEOMETRY_RASTERIZE_KWARGS = {"alg": GridRasterizeAlg.RASTERIO_RASTERIZE}
"""
Performs an additional validation of the grid source boundaries to ensure topological consistency.
This check computes the source boundaries from all valid grid data within the current computed
region, verifying that the source boundaries extracted from grid metrics align with the hull border.
When using grid metrics only, we assumes that points inside the source hull correspond to points
within the target hull, maintaining topological integrity. If this assumption is violated, the read
window may be insufficient, potentially causing a Rust panic when attempting to access out-of-bounds
indices.
This safety check helps prevent such runtime errors by proactively extending boundary conditions if
required.
"""
SAFECHECK_SOURCE_BOUNDARIES = True
[docs]
def apply_mask_strategy_chain(
array_in_mask: Optional[np.ndarray],
array_in_indices: Tuple[slice, slice],
pad: np.ndarray,
array_in_shape: Tuple[int, ...],
strategy: ResamplingMaskStrategy,
) -> Optional[np.ndarray]:
"""Build the final mask buffer for the chain IO path.
Chain-specific counterpart of ``apply_mask_strategy`` from the core
module. Both functions share the same strategy resolution
(``resolve_mask_strategy``) but differ in execution:
- The **core** path allocates a new padded buffer via
``source_extent_pad``.
- The **chain** path reuses a pre-allocated buffer sized to the full
marged window (``array_src_win_marged``), where only the data zone
(``array_in_indices``) has been written by the raster read.
This function reproduces the two-step logic of ``source_extent_pad``
on the existing buffer:
1. Fill the padded zone with ``strategy.pad_fill`` (skipped when
the buffer is already initialised to ``Validity.INVALID`` and
``strategy.pad_fill == Validity.INVALID``).
2. Overwrite the padded zone with extrapolated values via
``pad_inplace`` if ``strategy.boundary_condition`` is set.
Entry hypotheses
----------------
- ``array_in_mask``, when not ``None``, **must** have been allocated
with ``np.full(..., Validity.INVALID)`` so that the padded zone
is already set to ``Validity.INVALID``. Step 1 then only needs
to run when ``strategy.pad_fill == Validity.VALID`` (upward
correction), avoiding a redundant write in the common case.
- The data zone ``array_in_mask[array_in_indices]`` has already been
written with the raster read data before this function is called.
- ``array_in_indices`` identifies exactly the non-padded data zone,
i.e. ``window_from_indices(array_in_indices)`` is the complement
of the padded border.
- ``array_in_shape`` is the shape of the original (non-padded) source
window. It is only used when ``strategy.needs_mask_alloc`` is
``True`` (no raster mask dataset, no geometry mask).
- ``pad`` is the same padding array used to compute
``array_src_win_marged`` and ``array_in_indices``.
Parameters
----------
array_in_mask : np.ndarray or None
Pre-allocated mask buffer of shape ``(nrow_marged, ncol_marged)``,
dtype ``uint8``, C-contiguous, initialised with
``np.full(..., Validity.INVALID)``. The data zone at
``array_in_indices`` has already been written.
``None`` when no raster mask dataset and no geometry mask were
provided (``strategy.needs_mask_alloc`` must be ``True`` in
that case).
array_in_indices : tuple of slice
Index tuple ``(row_slice, col_slice)`` identifying the data zone
within ``array_in_mask``, i.e. the zone read from the raster
excluding padding.
pad : np.ndarray, shape (2, 2)
Padding amounts ``[[top, bottom], [left, right]]``.
array_in_shape : tuple of int
Shape of the original (non-padded) source window as
``(nrow, ncol)``. Required only when
``strategy.needs_mask_alloc`` is ``True`` to compute the padded
mask shape via ``get_array_padded_shape``.
strategy : ResamplingMaskStrategy
Strategy produced by ``resolve_mask_strategy``. Validated by
``check_mask_strategy`` before execution.
Returns
-------
np.ndarray or None
The final mask buffer (uint8, 2D, C-contiguous) ready to pass
to ``array_grid_resampling``, or ``None`` when no mask is needed
(``strategy.mask_kind == 'none'``).
Raises
------
ValueError
If ``strategy`` is internally inconsistent (via
``check_mask_strategy``), or if ``array_in_mask`` is ``None``
when the strategy requires an existing mask buffer.
See Also
--------
resolve_mask_strategy : Builds the strategy.
check_mask_strategy : Validates the strategy.
apply_mask_strategy : Core-path equivalent using ``source_extent_pad``.
"""
pad = np.asarray(pad)
has_pad = np.any(pad != 0)
check_mask_strategy(pad, strategy)
if strategy.mask_kind == "none":
return None
if strategy.needs_mask_alloc:
# No mask buffer exists yet (no raster mask, no geometry).
# Allocate a structural mask from scratch.
mask_profile = ArrayProfile(
shape=array_in_shape,
ndim=len(array_in_shape),
dtype=np.uint8,
)
mask_profile.make_2d()
mask_padded_shape, _ = get_array_padded_shape(mask_profile, pad)
final_mask = np.full(
mask_padded_shape,
strategy.pad_fill,
dtype=np.uint8,
order="C",
)
# Mark the safe region as valid when the rest is invalid.
if strategy.pad_fill != Validity.VALID:
final_mask[window_indices(strategy.safe_region)] = Validity.VALID
return final_mask
# binary or safe_region with an existing mask buffer.
if array_in_mask is None:
raise ValueError(
f"'strategy.mask_kind'={strategy.mask_kind!r} requires a mask "
"but array_in_mask is None"
)
final_mask = array_in_mask
if has_pad:
# Reproduce source_extent_pad two-step logic on the existing buffer.
# Step 1: fill the padded zone with strategy.pad_fill.
# The buffer is already initialised to Validity.INVALID (np.full) (see
# basic_grid_resampling_array) for the padded zone (the interior
# contains the read/rasterized mask value),
# so this step only runs when pad_fill == Validity.VALID to avoid
# a redundant write in the common case.
if strategy.pad_fill == Validity.VALID:
pad_area = complementary_window_indices(
win=window_from_indices(array_in_indices, final_mask.shape, None),
shape=final_mask.shape,
axes=None,
)
for pad_indices in pad_area:
final_mask[pad_indices] = Validity.VALID
# Step 2: overwrite padded zone with extrapolated values if a
# boundary condition is available.
if strategy.boundary_condition is not None:
pad_inplace(
array=final_mask,
src_win=array_in_indices,
pad_width=pad,
mode=strategy.boundary_condition,
strict_size=True,
)
return final_mask
[docs]
def basic_grid_resampling_array(
interp: Interpolator,
grid_arr: np.ndarray,
grid_arr_shape: Tuple[int, int],
grid_resolution: Tuple[int, int],
grid_nodata: Optional[Union[int, float]],
grid_mask_arr: np.ndarray,
grid_mask_in_unmasked_value: np.uint8,
array_src_ds: rasterio.io.DatasetReader,
array_src_bands: Union[int, List[int]],
array_src_mask_ds: Optional[rasterio.io.DatasetReader],
array_src_mask_band: Optional[int],
array_src_mask_validity_pair: Optional[Tuple[int, int]],
array_src_geometry_origin: Optional[Tuple[float, float]],
array_src_geometry_pair: Optional[Tuple[Optional[GeometryType], Optional[GeometryType]]],
oversampled_grid_win: np.ndarray,
margin: np.ndarray,
sma_out_buffer: SharedArray,
out_win: np.ndarray,
nodata_out: Optional[Union[int, float]],
boundary_condition: Optional[str],
trust_padding: bool,
sma_out_mask_buffer: Optional[SharedArray],
logger_msg_prefix: str,
logger: logging.Logger,
):
"""Resamples source data into a target oversampled window on a grid.
This method processes a 3D raster grid (`grid_arr`) that includes row and
column coordinates. The `grid_arr` may represent a sub-region of a larger,
non-oversampled grid. Additionally, since `grid_arr` might correspond to a
uniquely allocated buffer used for multiple sub-regions, we cannot rely on
its `shape` attribute to determine the dimensions of the sub-region.
Therefore, the `grid_arr_shape` argument is required.
Parameters
----------
interp : Interpolator
The interpolator.
grid_arr : numpy.ndarray
A 3D array of shape ``(2, rows, cols)``, containing the raster grid's
row and column coordinates. It may represent a sub-region of a larger
(non-oversampled) grid. The local origin ``(0, 0)`` corresponds to
``grid_arr[:, 0, 0]``.
grid_arr_shape : tuple of int
Shape of the active sub-region in `grid_arr`, given as ``(rows, cols)``.
Required because `grid_arr` may be a larger buffer reused across
tiles or subregions.
grid_resolution : tuple of int
Resolution of the coarse grid, typically in pixels or map units per
pixel (e.g., ``(10, 10)``).
grid_nodata : scalar or None
The NoData value associated with `grid_arr`, marking invalid or
missing data points.
grid_mask_arr : numpy.ndarray, optional
Optional 2D `uint8` or `int8` mask array aligned with `grid_arr`
(shape: ``(rows, cols)``).
Indicates valid (unmasked) and invalid (masked) data. Defaults to None.
grid_mask_in_unmasked_value : numpy.uint8
Value in `grid_mask_arr` that represents a valid/unmasked data point.
array_src_ds : rasterio.io.DatasetReader
The source dataset (e.g., a GDAL or Rasterio object) from which
raster data will be read and resampled.
array_src_bands : int or list of int
List of band indices to read from `array_src_ds`. If a single band is
provided, it can be an integer.
array_src_mask_ds : rasterio.io.DatasetReader or None, optional
Optional dataset representing the mask associated with `array_src_ds`.
Defaults to None.
array_src_mask_band : int or None, optional
Band index to read from `array_src_mask_ds` for the source mask.
Defaults to None.
array_src_mask_validity_pair : tuple of int, optional
A tuple containing two integer :
- The first integer corresponds to the value to consider as valid in
the mask array.
- The second integer corresponds to the value to consider as invalid
in the mask array.
If the tuple differs from (`Validity.VALID`, `Validity.INVALID`) a
replace operation will be performed in order to make the mask compliant
with the core resampling method.
array_src_geometry_origin : tuple of float or None, optional
This optional parameter specifies the origin convention for the
`array_src_geometry_pair` definition. GridR uses a ``(0, 0)`` image
coordinate system to address the first pixel of the `array_src`
raster. This parameter allows you to align the `array_src_geometry_pair`
definition with GridR's convention, ensuring proper spatial
referencing. Please note, its internal usage is solely for modifying
`array_src_geometry_pair`. Defaults to None.
array_src_geometry_pair : tuple of (GeometryType or None), optional
A tuple containing two optional `GeometryType` elements:
- The first element: Represents the **valid** geometries.
- The second element: Represents the **invalid** geometries.
If provided, a rasterization of those geometries is
performed locally on the current `array_src` raster window. This
generated mask is then merged with any additional raster mask supplied
via the `array_src_mask_ds` dataset. The rasterization itself is
delegated to the `build_mask` gridr's core method. Defaults to None.
oversampled_grid_win : numpy.ndarray
Target window for resampling, defined in full-resolution coordinates,
relative to the local origin of `grid_arr`.
margin : numpy.ndarray
Pixel margin to apply when computing the minimal read window from
`array_src_ds`, ensuring context for resampling (e.g., for kernels).
Format: ``[[top_margin, bottom_margin], [left_margin, right_margin]]``.
sma_out_buffer : SharedArray or numpy.ndarray
Output array (or shared memory buffer) where resampled values will
be written.
out_win : numpy.ndarray
Window within `sma_out_buffer` specifying where to write the output
data. Format: ``[[row_start, row_end], [col_start, col_end]]``.
nodata_out : scalar or None
NoData value to fill the output if the grid metrics are invalid or if
no valid data points can be found.
boundary_condition : str or None, default None
Optional padding mode when required data for interpolation lies
outside the source dataset domain. Available values are a subset
of the ``numpy.pad`` method modes: ``'edge'``, ``'reflect'``,
``'symmetric'``, or ``'wrap'``.
Uses a GridR-specific in-place padding implementation instead of
``numpy.pad`` to avoid unnecessary memory allocation.
For the mask, the padded zone fill value is determined by
``trust_padding``: ``Validity.VALID`` if trusted, ``Validity.INVALID``
otherwise. If a boundary condition is set and ``trust_padding`` is
``True``, the mask is padded using the same boundary condition.
trust_padding : bool, default True
Controls how the padded zone is marked in the mask when padding
is applied. If ``True`` and a ``boundary_condition`` is set,
the padded mask zone is considered valid (``Validity.VALID``).
If ``False``, the padded zone is always marked as
``Validity.INVALID`` regardless of the boundary condition.
See ``resolve_mask_strategy`` for the full decision tree.
sma_out_mask_buffer : SharedArray or numpy.ndarray or None, optional
Optional output array (or shared memory buffer) where output mask
will be written. Defaults to None.
logger_msg_prefix : str
Prefix to prepend to all logger messages, useful for debugging and
tracing within logs.
logger : logging.Logger
Logger instance used for debug and informational messages.
Notes
-----
The goal is to generate data within a specific target window
(`oversampled_grid_win`), which is defined in a full-resolution geometry.
The coordinates of this target window are relative to the local origin
of `grid_arr`.
To optimize the loading of only the necessary extent from the source image
(`array_src_ds`), this method calls `array_compute_resampling_grid_geometries`
on the minimal low-resolution grid window that completely contains the
target full-resolution window. The provided `margin` parameter is also
incorporated into this calculation to ensure sufficient data coverage.
The method writes the resampled output to a shared memory array
(`sma_out_buffer`), using `out_win` to specify the target writing window
within this buffer.
Optional masks for both the grid and the source array may be passed.
Masks can be passed as either `int8` or `uint8` arrays, but the values must
be positive and within the uint8 range of [0-255]. If you provide an `int8`
mask, the method will internally shadow it with a `uint8` view.
If the grid metrics are not valid (i.e., there was not sufficient valid data
to determine the grid and source boundaries), the method fills the windowed
output with the `nodata_out` value.
"""
def DEBUG(msg):
logger.debug(f"{logger_msg_prefix} - {msg}")
def WARNING(msg):
logger.warning(f"{logger_msg_prefix} - {msg}")
# By default consider everything will be nodata as we dont know yet if
# the inputs are valid in the targeted area.
full_nodata = True
array_src_profile_2d = ArrayProfile(
shape=(array_src_ds.height, array_src_ds.width),
ndim=2,
dtype=np.dtype(array_src_ds.dtypes[array_src_bands[0] - 1]),
)
# Check grid_mask_arr - no change done here but the grid_mask_arr
# may be shadowed with an uint8 view if passed as int8
if grid_mask_arr is not None:
# Check array_src_mask dtype is an 8 bit integer
# We will ensure it is passed as uint8 later
if grid_mask_arr.dtype not in (np.int8, np.uint8):
raise TypeError("The grid mask array must be an 8 bit integer " "raster.")
# Check validity value in range of uint8
if grid_mask_in_unmasked_value < 0 or grid_mask_in_unmasked_value > 255:
raise ValueError(
"The `grid_mask_in_unmasked_value` must be in the "
"[0-255] range."
f"The current value is {grid_mask_in_unmasked_value}"
)
if grid_mask_arr.dtype == np.int8:
grid_mask_arr = grid_mask_arr.view(np.uint8)
# Compute source boundaries from all valid coordinates
(
array_src_win_read,
array_src_win_marged,
pad,
grid_metrics,
) = calculate_source_extent(
interp=interp,
array_in=array_src_profile_2d,
grid_row=grid_arr[0],
grid_col=grid_arr[1],
grid_resolution=grid_resolution,
grid_nodata=None, # TODO : Not yet supported
grid_mask=grid_mask_arr,
grid_mask_valid_value=grid_mask_in_unmasked_value,
win=oversampled_grid_win,
safecheck_src_boundaries=SAFECHECK_SOURCE_BOUNDARIES,
logger_msg_prefix=logger_msg_prefix,
logger=logger,
)
if array_src_win_read is not None:
# if array_src_win_read is not None then pad is not None too
# Read data is available
full_nodata = False
array_src_win_read_shape = window_shape(array_src_win_read)
# memory required - for array_src
DEBUG("Computing required memory for array source read")
array_src_win_memory = 0
for band in array_src_bands:
array_src_win_band_memory = np.dtype("float64").itemsize * np.prod(
np.diff(array_src_win_read, axis=1)
)
array_src_win_memory += array_src_win_band_memory
DEBUG(
f"memory needed for array_src_win + margin band {band}: "
f"{array_src_win_band_memory} bytes"
)
# memory required - for array_src_mask
DEBUG("Computing required memory for array source mask read")
array_src_mask_win_memory = 0
array_src_mask_dtype = np.uint8
array_src_mask_read_dtype = np.uint8
if array_src_mask_ds is not None:
array_src_mask_read_dtype = np.dtype(array_src_mask_ds.dtypes[array_src_mask_band - 1])
# Check array_src_mask dtype is an 8 bit integer
# We will ensure it is passed as uint8 later
if array_src_mask_read_dtype not in (np.int8, np.uint8):
raise TypeError("The mask array must be an 8 bit integer " "raster.")
array_src_mask_win_memory = array_src_mask_read_dtype.itemsize * np.prod(
np.diff(array_src_win_read, axis=1)
)
DEBUG(
f"Memory required for array_src_mask_win + margin : "
f"{array_src_mask_win_memory} bytes"
)
array_src_win_memory += array_src_mask_win_memory
else:
DEBUG("No available mask for array source\n")
DEBUG("Memory required for array_src_mask + margin : " f"{array_src_mask_win_memory} bytes")
DEBUG(
f"Total memory required for array_src_win and mask : " f"{array_src_win_memory} bytes"
)
# The `tile_read_buffer_shape` corresponds to the shape of the buffer
# that will hold the read data. It can be larger than the actual read
# window set in the `tile_src_sin_read` variable in order to also hold
# "virtual" margins.
array_src_read_buffer_shape = window_shape(array_src_win_marged)
array_src_read_buffer_shape = np.insert(
array_src_read_buffer_shape, 0, len(array_src_bands)
)
DEBUG(f"tile read buffer shape : {array_src_read_buffer_shape}")
# TODO : do not force float 64 here => requires bound core function to handle other
# types
# cstrip_read_buffer = np.zeros(cstrip_read_buffer_shape, dtype=array_src_profile.dtype)
# Note : the read buffer is initialize with zeros. If no boundary
# condition, the marged border strips will remain at zero.
array_src_read_buffer = np.zeros(array_src_read_buffer_shape, dtype=np.float64, order="C")
# Read the source array.
# - Due to "virtual" margins we have to compute the correct indices in
# the tile_read_buffer that will hold the read array for each band.
DEBUG("Reading tiles...")
def get_read_buffer_indices(b, p, s):
return (
b,
slice(p[0][0], p[0][0] + s[0], None),
slice(p[1][0], p[1][0] + s[1], None),
)
def get_mask_read_buffer_indices(p, s):
return (
slice(p[0][0], p[0][0] + s[0], None),
slice(p[1][0], p[1][0] + s[1], None),
)
for band_read, band_in in enumerate(array_src_bands):
indices = get_read_buffer_indices(band_read, pad, array_src_win_read_shape)
# TODO : the bellow commented line should be decommented when f64 is not forced.
# array_src_ds.read(band+1, window = as_rio_window(ctile_src_win_read),
# out=ctile_read_buffer[indices])
# Save read data in the buffer at `indices`.
DEBUG(
f"Reading source window for source band {band_in} "
f"- source window : {array_src_win_read} "
f"- target indices : {indices} ..."
)
array_src_read_buffer[indices] = array_src_ds.read(
band_in, window=as_rio_window(array_src_win_read)
).astype(np.float64)
DEBUG(
f"Reading source window for source band {band_in} "
f"- source window : {array_src_win_read} "
f"- target indices : {indices} [DONE]"
)
# Boundary conditions
if boundary_condition is not None and np.any(pad != 0):
# array_src_read_buffer is 3d
# we must convert pad to 3d here
pad_inplace(
array=array_src_read_buffer[band_read],
src_win=indices[1:],
pad_width=pad,
mode=boundary_condition,
strict_size=True,
)
array_src_mask_read_buffer = None
array_src_mask_read_buffer_shape = window_shape(array_src_win_marged)
indices = get_mask_read_buffer_indices(pad, array_src_win_read_shape)
# Manage raster mask
if array_src_mask_ds:
# Manage the mask assuming here the same shape as image
# Default value to 0 in order to init as not valid
# Here we init in all cases (array_src_mask_ds given or not)
# array_src_mask_read_buffer_shape = window_shape(array_src_win_marged)
array_src_mask_read_buffer = np.full(
array_src_mask_read_buffer_shape,
Validity.INVALID,
dtype=array_src_mask_dtype,
order="C",
)
DEBUG(
f"Reading source window for source mask "
f"- source window : {array_src_win_read} "
f"- target indices : {indices} ..."
)
mask_buffer_tmp = array_src_mask_ds.read(
array_src_mask_band, window=as_rio_window(array_src_win_read)
)
# We do not use the second element of `array_src_mask_validity_pair`
# It must be given in order to check if we have to force replacement
# even if the input valid value corresponds to `Validity.VALID`
if array_src_mask_validity_pair != (Validity.VALID, Validity.INVALID):
DEBUG("Replacing read mask values to comply with internal convention")
array_replace(
mask_buffer_tmp,
array_src_mask_validity_pair[0],
Validity.VALID,
Validity.INVALID,
)
if mask_buffer_tmp.dtype == np.uint8:
array_src_mask_read_buffer[indices] = mask_buffer_tmp
else:
# Due to previous test it is int8
# In that case we just get a view
array_src_mask_read_buffer[indices] = mask_buffer_tmp.view(np.uint8)
DEBUG(
f"Reading source window for source mask "
f"- source window : {array_src_win_read} "
f"- target indices : {indices} [DONE]"
)
# The grid stores absolute source coordinates. However, when operating
# on a localized sub-region of the raster, we must compensate for the
# relative shift of its origin.
# This calculated offset is then provided to the resampling function,
# which will apply it during the target coordinate to interpolate.
# We avoid modifying the grid in place to prevent unintended side
# effects if it's used concurrently by other processes.
#
# Specifically, 'tile_src_origin' is derived by adjusting for any
# 'tile_pad' (virtual padding) relative to the absolute upper-left
# corner of the read window ('tile_src_win_read'). This value precisely
# defines the origin of the 'tile_read_buffer' in the context of the
# overall 'array_src' coordinate system.
array_src_origin = (
pad[0][0] - array_src_win_read[0][0],
pad[1][0] - array_src_win_read[1][0],
)
# Manage geometry mask
if array_src_geometry_pair is not None:
if array_src_mask_read_buffer is None:
array_src_mask_read_buffer = np.full(
array_src_mask_read_buffer_shape,
Validity.INVALID,
dtype=array_src_mask_dtype,
order="C",
)
array_src_mask_read_buffer[indices] = Validity.VALID
# We have to define the rasterization mesh so that is will be
# aligned with the current raster mask.
# Here the rasterize grid is given by :
# - its origin : `array_src_origin`
# - its shape : `array_src_mask_read_buffer` shape
#
# The `array_src_mask_read_buffer` is defined in all cases
# (with or without `array_src_mask_ds` defined)
# The goal here is to use build_mask in order to merge the
# existing `array_src_mask_read_buffer` with the geometry
# masks
# `array_src_origin` defines the origin of the current sub-array
# in the full array using GridR's internal convention, ie using
# (0, 0) as origin.
#
# ┌────────────────────────────────────────────────────────────┐
# │ WARN : │
# │ │
# │ `array_src_origin` is a negative shift (see definition) │
# │ We need to take its opposite here │
# └────────────────────────────────────────────────────────────┘
# In order to rasterize we have to take care of the
# `array_src_geometry_origin`
cgeometry_origin = -np.asarray(array_src_origin).astype(np.float64)
if array_src_geometry_origin is not None:
cgeometry_origin += array_src_geometry_origin
# ┌────────────────────────────────────────────────────────────┐
# │ NOTE : │
# │ │
# │ The build_mask core method adopts the same convention : │
# │ - Validity.VALID (1) is considered valid │
# │ - Validity.INVALID (0) is considered invalid │
# └────────────────────────────────────────────────────────────┘
# We do not pass the mask_in here as it will currently result in
# non necessary thresholding
# In that case we cant pass the out with the current code as
# it will result in a overwrite of the buffer.
geom_mask = build_mask(
shape=array_src_mask_read_buffer_shape,
resolution=(1, 1),
out=None,
geometry_origin=cgeometry_origin,
geometry_pair=array_src_geometry_pair,
mask_in=None,
mask_in_target_win=None,
mask_in_resolution=None,
oversampling_dtype=None,
mask_in_binary_threshold=None,
rasterize_kwargs=GEOMETRY_RASTERIZE_KWARGS,
init_out=False,
)
if array_src_mask_ds is not None:
array_src_mask_read_buffer[indices] &= geom_mask[indices]
else:
array_src_mask_read_buffer[indices] = geom_mask[indices]
# Resolve mask strategy
# Pre-scan mask : not null or all-valid
array_in_resolve = (
array_src_mask_read_buffer[indices] if array_src_mask_read_buffer is not None else None
)
mask_strategy = resolve_mask_strategy(
interp=interp,
pad=pad,
array_in_mask=array_in_resolve,
boundary_condition=boundary_condition,
trust_padding=trust_padding,
array_in_shape=array_src_win_read_shape,
array_in_mask_safe_win=None,
)
array_in_mask = apply_mask_strategy_chain(
array_in_mask=array_src_mask_read_buffer,
array_in_indices=indices,
pad=pad,
array_in_shape=array_src_win_read_shape,
strategy=mask_strategy,
)
safe_region = mask_strategy.safe_region
# TODO/TOCHECK We may reset the sma_w_array_buffer.array if the cslices
# is limited (not the case for now)
# array_out_mask
array_out_mask = sma_out_mask_buffer.array if sma_out_mask_buffer is not None else None
# For performance we go with check_boundaries is False by default.
# This activate a rust code branch where no explicit tests are performed
# on boundaries. Please note, the rust code will Panic if out of bounds
# index are used.
# check_boundaries = check_boundaries or mask_strategy.check_boundaries
check_boundaries = True
# Note : Safety measure but may not be required
if np.any(pad != 0):
check_boundaries = True
# In case of a B-Spline interpolator, call B-Spline prefiltering first
# Please note the previously margin must integrate the domain extension
# required for the pre-filtering - which is the case if the margins were
# calculated using the interpolator `total_margins()` method.
# Here the mask does not enter the same pre-filtering routine as the
# image. Instead a propagation of invalid data is performed base on
# the interpolator `mask_influence_threshold` parameter.
if is_bspline(interp):
array_bspline_prefiltering(
array_in=array_src_read_buffer, # thats the previously read buffer
array_in_mask=array_in_mask,
interp=interp, # The interpolator
)
if safe_region is not None and array_in_mask is not None:
# update safe_region
safe_region = array_bspline_prefiltering_mask_safe_win(
array_in_mask=array_in_mask,
array_in_mask_safe_win=safe_region,
interp=interp,
)
else:
safe_region = None
# Call the resampling method - this method returns a tuple containing
# the output array and the output mask.
# Here both are returned as None as the buffer are given as input
_ = array_grid_resampling(
interp=interp,
array_in=array_src_read_buffer, # thats the previously read buffer
grid_row=grid_arr[0], # the grid rows
grid_col=grid_arr[1], # the grid columns
grid_resolution=grid_resolution,
array_out=sma_out_buffer.array,
array_out_win=out_win, # dst window in the array_out
nodata_out=nodata_out,
array_in_origin=array_src_origin,
win=oversampled_grid_win, # the production window
array_in_mask=array_in_mask, # the input mask optionnaly prefiltered,
array_in_mask_safe_win=safe_region,
grid_mask=grid_mask_arr, # TO CHECK: Optional[np.ndarray] = None,
grid_mask_valid_value=grid_mask_in_unmasked_value, #: Optional[int] = 1,
grid_nodata=None, # TODO : manage grid_nodata input
array_out_mask=array_out_mask, # output mask buffer,
check_boundaries=check_boundaries,
standalone=False, # We are not in standalone mode
trust_padding=trust_padding, # should not be used with standalone = False
)
if full_nodata:
# Write NODATA
win_slice = window_indices(out_win, reset_origin=False)
# Add the band axis.
win_slice3 = (slice(None, None),) + win_slice
sma_out_buffer.array[win_slice3] = nodata_out
# If mask out - set all to no valid (0)
if sma_out_mask_buffer is not None:
sma_out_mask_buffer.array[win_slice] = 0
[docs]
def basic_grid_resampling_chain(
grid_ds: rasterio.io.DatasetReader,
grid_row_coords_band: int,
grid_col_coords_band: int,
grid_resolution: Tuple[int, int],
array_src_ds: rasterio.io.DatasetReader,
array_src_bands: Union[int, List[int]],
array_out_ds: rasterio.io.DatasetWriter,
interp: InterpolatorIdentifier,
nodata_out: Union[int, float],
grid_col_ds: Union[rasterio.io.DatasetReader, None] = None,
interp_kwargs: Optional[dict] = None,
boundary_condition: Optional[str] = None,
trust_padding: Optional[bool] = False,
win: Optional[np.ndarray] = None,
grid_shift: Optional[Union[Tuple[int, int], Tuple[float, float]]] = None,
array_src_mask_ds: Optional[rasterio.io.DatasetReader] = None,
array_src_mask_band: Optional[int] = None,
array_src_mask_validity_pair: Optional[Tuple[int, int]] = None,
mask_out_ds: Optional[rasterio.io.DatasetWriter] = None,
grid_mask_in_ds: Optional[rasterio.io.DatasetReader] = None,
grid_mask_in_unmasked_value: Optional[int] = None,
grid_mask_in_band: Optional[int] = None,
array_src_geometry_origin: Optional[Tuple[float, float]] = None,
array_src_geometry_pair: Optional[Tuple[Optional[GeometryType], Optional[GeometryType]]] = None,
io_strip_size: int = DEFAULT_IO_STRIP_SIZE,
io_strip_size_target: GridRIOMode = GridRIOMode.INPUT,
ncpu: int = DEFAULT_NCPU,
tile_shape: Optional[Tuple[int, int]] = DEFAULT_TILE_SHAPE,
logger: Optional[logging.Logger] = None,
) -> int:
"""Performs a comprehensive grid-based resampling operation.
This function orchestrates the entire resampling process from input grid
and source raster datasets to an output raster dataset, handling various
masking, interpolation, and I/O strategies. It leverages the
`basic_grid_resampling_array` core method for the actual array processing.
Parameters
----------
grid_ds : rasterio.io.DatasetReader
Input dataset containing the grid coordinates. This dataset provides
the destination geometry for resampling.
grid_row_coords_band : int
Band index in `grid_ds` corresponding to the row coordinates of the
grid.
grid_col_coords_band : int
Band index in `grid_ds` (or `grid_col_ds`) corresponding to the
column coordinates of the grid.
grid_resolution : tuple of int
Resolution of the coarse grid, typically in pixels or map units per
pixel (e.g., ``(10, 10)``).
array_src_ds : rasterio.io.DatasetReader
The source dataset from which raster data will be read and resampled.
array_src_bands : int or list of int
Band index or list of band indices to read from `array_src_ds`.
array_out_ds : rasterio.io.DatasetWriter
Output dataset where the resampled raster data will be written.
interp: InterpolatorIdentifier
The interpolator identifier to use. It can be:
- A string representing the interpolator name (e.g., "nearest", "linear"
, "cubic", "bspline3", "bspline11", etc.).
- A `PyInterpolatorType` enum value.
- An instance of an interpolator class.
See `gridr.core.interp.interpolator` for further details
nodata_out : scalar
NoData value to fill the output raster when no valid data points
can be found for a given output pixel.
grid_col_ds : rasterio.io.DatasetReader or None, optional
Optional separate dataset for grid column coordinates if they are not
in `grid_ds`. Defaults to None.
interp_kwargs : Any, default None
Optional keyword parameters that will be passed for the interpolator
creation to the `get_interpolator` function. They will be used if the
interpolator passed through the `interp` is either of type `str` or
`PyInterpolatorType`.
boundary_condition : str or None, default None
Optional padding mode when required data for interpolation lies
outside the source dataset domain. Available modes :
- 'constant' (0)
Pads with a constant value (0).
-'edge'
Pads with the edge values of array.
- 'reflect'
Pads with the reflection of the vector mirrored on
the first and last values of the vector along each
axis.
- 'symmetric'
Pads with the reflection of the vector mirrored
along the edge of the array.
- 'wrap'
Pads with the wrap of the vector along the axis.
The first values are used to pad the end and the
end values are used to pad the beginning.
- ``None`` Zero padding is applied. If insufficient data is available for
interpolation, those regions will be marked as invalid in the mask. The
behaviour is different from `constant` : with that mode, the padded zone
can be either trusted or not.
Uses a GridR-specific in-place padding implementation instead of
``numpy.pad`` to avoid unnecessary memory allocation.
For the mask, the padded zone fill value is determined by
``trust_padding``: ``Validity.VALID`` if trusted, ``Validity.INVALID``
otherwise. If a boundary condition is set and ``trust_padding`` is
``True``, the mask is padded using the same boundary condition.
trust_padding : bool or None, default False
Controls how the padded zone is marked in the mask when padding
is applied. If ``True`` and a ``boundary_condition`` is set, the
padded mask zone is considered valid. If ``False``, the padded zone
is always marked as``Validity.INVALID`` regardless of the boundary
condition.
Forwarded to ``basic_grid_resampling_array``.
Must be explicitly set to ``True`` or ``False`` when
``boundary_condition`` is not ``None`` -- passing ``None`` in that
case raises a ``ValueError``. Ignored when ``boundary_condition``
is ``None``.
win : numpy.ndarray, optional
Optional output window of the `grid_ds` to process, defined as
``[[row_start, row_end], [col_start, col_end]]``. This defines the
region of interest for the resampling. If None, the full grid extent
is considered.
Defaults to None.
grid_shift: tuple of int or tuple of float, optional
Optional shift vector applied to all grid coordinates, expressed in the
source image coordinate system. The first component is applied to row
coordinates and the second to column coordinates.
The parameter allows adjustement of the pixel-center convention relative
to that used by GridR during resampling - for example, to switch between
half-pixel and whole-pixel coordinate conventions, the latter being the
one used by GridR.
array_src_mask_ds : rasterio.io.DatasetReader or None, optional
Optional dataset representing the mask associated with `array_src_ds`.
Defaults to None.
array_src_mask_band : int or None, optional
Band index to read from `array_src_mask_ds` for the source mask.
Defaults to None.
array_src_mask_validity_pair : tuple of int, optional
A tuple containing two integer :
- The first integer corresponds to the value to consider as valid in
the mask array.
- The second integer corresponds to the value to consider as invalid
in the mask array.
If the tuple differs from (`Validity.VALID`, `Validity.INVALID`) a
replace operation will be performed in order to make the mask compliant
with the core resampling method.
mask_out_ds : rasterio.io.DatasetWriter
Output dataset where the resampled validity mask will be written.
This mask indicates which output pixels contain valid resampled data.
grid_mask_in_ds : rasterio.io.DatasetReader or None, optional
Optional input dataset for the grid mask. This mask can define valid
areas within the grid itself. Defaults to None.
grid_mask_in_unmasked_value : int or None, optional
Value in `grid_mask_in_ds` that represents a valid/unmasked data point.
Defaults to None.
grid_mask_in_band : int or None, optional
Band index to read from `grid_mask_in_ds` for the input grid mask.
Defaults to None.
array_src_geometry_origin : tuple of float or None, optional
Specifies the origin convention for `array_src_geometry_pair` definition.
GridR uses a ``(0, 0)`` image coordinate system to address the first
pixel of the source raster. This parameter aligns the geometry
definition with GridR's convention. Defaults to None.
array_src_geometry_pair : tuple of (GeometryType or None), optional
A tuple containing two optional `GeometryType` elements:
- The first element: Represents the **valid** geometries.
- The second element: Represents the **invalid** geometries.
If provided, a rasterization of those geometries is
performed locally on the current `array_src` raster window. This
generated mask is then merged with any additional raster mask supplied
via the `array_src_mask_ds` dataset. The rasterization itself is
delegated to the `build_mask` gridr's core method. Defaults to None.
io_strip_size : int, optional
The number of rows per chunk for I/O operations. This parameter
optimizes memory usage and processing speed by dividing the input
and output operations into manageable strips. Defaults to
`DEFAULT_IO_STRIP_SIZE`.
io_strip_size_target : GridRIOMode, optional
Defines how `io_strip_size` is applied, e.g., to input or output
strips. Defaults to `GridRIOMode.INPUT`.
ncpu : int, optional
Number of CPU cores to use for parallel processing. Defaults to
`DEFAULT_NCPU`.
tile_shape : tuple of int or None, optional
Shape ``(rows, cols)`` for internal processing tiles within strips,
optimizing cache usage. Defaults to `DEFAULT_TILE_SHAPE`.
logger : logging.Logger or None, optional
Logger instance for debugging and informational messages. If None,
a default logger is initialized internally. Defaults to None.
Returns
-------
int
Returns 1 upon successful completion of the resampling process.
A return value other than 1 indicates an error.
Notes
-----
This function manages the reading of input data in chunks (strips),
calls the `basic_grid_resampling_array` method for processing each chunk,
and then writes the results to the output datasets.
The method handles grid data from one or two separate datasets for row and
column coordinates. It also incorporates masking capabilities for both
the input grid and the source array, allowing for flexible data validity
management.
The `win` parameter is crucial for defining the specific output region
to be processed, enabling partial grid resampling without loading the
entire dataset into memory.
"""
if logger is None:
logger = logging.getLogger(__name__)
logger.info(f"Using shared-array backend: {get_backend()}")
if boundary_condition is not None and trust_padding is None:
raise ValueError(
"'trust_padding' must be explicitly set when "
"'boundary_condition' is not None, got None"
)
# Init a list to register SharedArray buffers
sma_buffer_list = []
register_sma = partial(create_and_register, register=sma_buffer_list)
# Get input shape from input grid
grid_nrow, grid_ncol = grid_ds.height, grid_ds.width
logger.debug(f"Grid shape : {grid_nrow} rows x {grid_ncol} columns")
grid_row_ds = grid_ds
if grid_col_ds is None:
grid_col_ds = grid_ds
assert grid_row_coords_band != grid_col_coords_band
else:
# Test shapes are the same
assert grid_nrow == grid_col_ds.height
assert grid_ncol == grid_col_ds.width
if grid_mask_in_ds is not None:
grid_mask_nrow, grid_mask_ncol = grid_mask_in_ds.height, grid_mask_in_ds.width
logger.debug(f"Grid mask shape : {grid_mask_nrow} rows x " f"{grid_mask_ncol} columns")
logger.debug(f"Grid mask unmasked value : {grid_mask_in_unmasked_value}")
assert grid_nrow == grid_mask_nrow
assert grid_ncol == grid_mask_ncol
assert grid_mask_in_unmasked_value is not None
assert grid_mask_in_unmasked_value >= 0
logger.debug(f"Grid mask dtype : {np.dtype(grid_mask_in_ds.dtypes[grid_mask_in_band-1])}")
# assert(np.dtype(grid_mask_in_ds.dtypes[grid_mask_in_band-1]) == np.dtype('uint8'))
else:
logger.debug("Grid mask : no input grid mask")
# Check that optional input array mask has the same shape as the source
# array
if array_src_mask_ds is not None:
src_shape = (array_src_ds.width, array_src_ds.height)
mask_shape = (array_src_mask_ds.width, array_src_mask_ds.height)
if src_shape != mask_shape:
raise ValueError(
f"The shape of 'array_src_mask_ds' {src_shape}"
f"does not match the shape of 'array_src_ds' {mask_shape}"
)
# Cut in strip chunks
# io_strip_size is computed for the output grid but it can either be piloted
# by setting a target size for the read buffer.
if io_strip_size not in [0, None]:
if io_strip_size_target == GridRIOMode.INPUT:
# We have to take into account the grid's resolution along rows
io_strip_size = io_strip_size * grid_resolution[0]
elif io_strip_size_target == GridRIOMode.OUTPUT:
# The strip_size is directly given for the target output
pass
else:
raise ValueError(
f"Not recognized value {io_strip_size_target} for "
"the 'io_strip_size_target' argument"
)
logger.debug(f"Computed strip size (number of rows) : {io_strip_size}")
# Compute the full output shape - used if no window
full_shape_out = grid_full_resolution_shape(
shape=(grid_nrow, grid_ncol), resolution=grid_resolution
)
logger.debug(
f"Computed full output shape : {full_shape_out[0]} rows x {full_shape_out[1]}" " columns"
)
# Create an array profile for full output in order to use window utils
full_profile_out = ArrayProfile(
shape=full_shape_out, ndim=2, dtype=np.dtype(array_out_ds.dtypes[0])
)
# The window is given in target geometry - if None we set it to full grid
# The window contains the first and last index for each axis
if win is None:
logger.debug("No window given : the full grid is considered as window")
win = np.array(((0, full_shape_out[0] - 1), (0, full_shape_out[1] - 1)))
# If the window is given we have to check that it lies in the grid
elif not window_check(full_profile_out, win):
raise Exception("The given 'window' is outside the grid domain of " "definition.")
logger.debug(f"Window : {win}")
shape_out = win[:, 1] - win[:, 0] + 1
logger.debug(f"Shape out : {shape_out}")
# Compute strips definitions
# TODO : MAKE SURE HERE THAT GRID CHUNKS COVER AT LEAST 2 ROWS IF RESOLUTION > 1
chunk_boundaries = chunks.get_chunk_boundaries(
nsize=shape_out[0], chunk_size=io_strip_size, merge_last=False
)
# First convert chunks to target windows
# Please note that the last coordinate in each chunk is not contained
# in the chunk (it corresponds to the index), whereas the window
# definition contains index that are in the window
# Notes :
# - the shape_out corresponds to the production window shape
# - we shift the chunk_windows of the window origin
chunk_windows = [
np.array([[c0, c1 - 1], [0, shape_out[1] - 1]]) + win[:, 0].reshape(-1, 1)
for c0, c1 in chunk_boundaries
]
# Compute the window to read for each chunk window.
# This will returns both the window to read, and the relative window
# corresponding to the target chunk window.
# The `grid_resolution_window_safe` ensures for each dimension where
# resolution > 1, that stop - start + 1 >= 2 for the read window.
# This is required by the resampling method in order to interpolate the
# grid values.
chunk_windows_read = [
grid_resolution_window_safe(
resolution=grid_resolution, win=chunk_win, grid_shape=(grid_nrow, grid_ncol)
)
for chunk_win in chunk_windows
]
# Determine the read buffer shape for the grid
read_grid_buffer_shape = np.max(
np.asarray([window_shape(read_win) for read_win, rel_win in chunk_windows_read]), axis=0
)
read_grid_buffer_shape3 = np.insert(read_grid_buffer_shape, 0, 2)
logger.debug(f"Read grid buffer shape : {read_grid_buffer_shape}")
logger.debug(f"Read grid buffer shape 3 : {read_grid_buffer_shape3}")
# Create shared memory array for read input grid.
sma_r_buffer_grid = register_sma(
read_grid_buffer_shape3, grid_row_ds.dtypes[grid_row_coords_band - 1]
)
# TO_CHECK
# Create shared memory array for grid mask
sma_in_buffer_grid_mask = None
if grid_mask_in_ds is not None:
sma_in_buffer_grid_mask = register_sma(
read_grid_buffer_shape, grid_mask_in_ds.dtypes[grid_mask_in_band - 1]
)
# Get array profile for input array
try:
array_src_bands[0]
except TypeError:
array_src_bands = [array_src_bands]
# Determine the write buffer shape
# The computation is performed using the chunk_windows.
# - nrow x ncol : from max full res grid strip size
# - ndim : from the number of bands to process
write_buffer_shape2 = np.max(
np.asarray([window_shape(chunk_win) for chunk_win in chunk_windows]), axis=0
)
write_buffer_shape = np.insert(write_buffer_shape2, 0, len(array_src_bands))
# Create shared memory array for output
# The core resampling method currently supports float64 buffers only.
# If the output dataset has a type other than float64, we create two buffers:
# - The first buffer holds the computation in float64.
# - The second buffer maintains the target dtype and is written to disk.
array_out_ds_dtype = np.dtype(array_out_ds.dtypes[0])
# `sma_w_array_buffer_convert` determines if conversion is needed
# - If the buffer is None, no conversion is required
# - If the buffer is defined, conversion is required and it will contain the converted data
# ready for writing
sma_w_array_buffer_convert = None
logger.debug(f"Create float64 computation array buffer with shape {write_buffer_shape}")
sma_w_array_buffer = register_sma(write_buffer_shape, np.float64)
logger.debug(f"Create float64 computation array buffer with shape {write_buffer_shape} DONE")
if array_out_ds_dtype != np.dtype("float64"):
logger.debug(f"Create write array buffer with shape {write_buffer_shape}")
sma_w_array_buffer_convert = register_sma(write_buffer_shape, array_out_ds_dtype)
logger.debug(f"Create write array buffer with shape {write_buffer_shape} DONE")
# Manage output mask
sma_w_mask_buffer = None
if mask_out_ds is not None:
mask_out_dtype = np.dtype(mask_out_ds.dtypes[0])
# Create shared memory array for output mask
logger.debug(f"Create write mask buffer with shape {write_buffer_shape2}")
sma_w_mask_buffer = register_sma(write_buffer_shape2, mask_out_dtype)
logger.debug(f"Create write mask buffer with shape {write_buffer_shape2} DONE")
# Setting the interpolator
if interp_kwargs is None:
interp = get_interpolator(interp)
else:
interp = get_interpolator(interp, **interp_kwargs)
# Interpolator internal initialization
logger.debug(f"Initializing interpolator {interp.shortname()}")
interp.initialize()
# Computing total required margins
# (top, bottom, left, right)
margin = np.asarray(interp.total_margins()).reshape((2, 2))
logger.debug(f"Required margins for interpolator {interp.shortname()} : {margin}")
try:
for chunk_idx, (chunk_win, (win_read, win_rel)) in enumerate(
zip(chunk_windows, chunk_windows_read, strict=True)
):
logger.debug(f"Chunk {chunk_idx} - chunk_win: {chunk_win}")
# Compute current strip chunk parameters
# - cshape : current strip output buffer shape
# - cread_shape : current strip read buffer shape (input mask)
# - cread_rows_arr : current strip array containing the read data
# for the grid rows
# - cread_cols_arr : current strip array containing the read data
# for the grid columns
# - cread_mask_arr : current strip array containing the read data
# for the input mask if given
# - cslices : current strip slices to adress the output buffer whose
# origin corresponds to the origin of the current strip
# - cslices_as_win : current strip slices converted to a window
# - cslices_write : current strip slice to adress the whole output
# dataset for IO write operation
cshape = window_shape(chunk_win)
cslices = window_indices(chunk_win, reset_origin=True)
cslices_as_win = window_from_indices(cslices, cshape)
cslices3 = (slice(None, None),) + cslices
# Define the target positioning window `cstrip_target_win` to write to
# disk. We have to revert back the shift of the production window.
cstrip_target_win = chunk_win - win[:, 0].reshape(-1, 1)
# read the grid data
cread_shape = window_shape(win_read)
# First row and col grids
_ = grid_row_ds.read(
grid_row_coords_band,
window=as_rio_window(win_read),
out=sma_r_buffer_grid.array[0, 0 : cread_shape[0], 0 : cread_shape[1]],
)
_ = grid_col_ds.read(
grid_col_coords_band,
window=as_rio_window(win_read),
out=sma_r_buffer_grid.array[1, 0 : cread_shape[0], 0 : cread_shape[1]],
)
cread_grid_arr = sma_r_buffer_grid.array[:, 0 : cread_shape[0], 0 : cread_shape[1]]
assert cread_grid_arr[0].flags.c_contiguous
assert cread_grid_arr[1].flags.c_contiguous
# Read the grid mask data if given
cread_grid_mask_arr = None
if sma_in_buffer_grid_mask is not None:
_ = grid_mask_in_ds.read(
grid_mask_in_band,
window=as_rio_window(win_read),
out=sma_in_buffer_grid_mask.array[0 : cread_shape[0], 0 : cread_shape[1]],
)
cread_grid_mask_arr = sma_in_buffer_grid_mask.array[
0 : cread_shape[0], 0 : cread_shape[1]
]
logger.debug(f"Chunk {chunk_idx} - shape : {cshape}")
logger.debug(f"Chunk {chunk_idx} - grid read shape : {cread_shape}")
logger.debug(f"Chunk {chunk_idx} - buffer slices : {cslices}")
logger.debug(f"Chunk {chunk_idx} - buffer slices as win : {cslices_as_win}")
logger.debug(f"Chunk {chunk_idx} - target write window : {cstrip_target_win}")
logger.debug(f"Chunk {chunk_idx} - resampling starts...")
# Apply shift on grid coordinates
# This is performed in place using core.grid_utils.array_shift_grid_coordinates
if grid_shift is not None:
array_shift_grid_coordinates(
grid_row=cread_grid_arr[0],
grid_col=cread_grid_arr[1],
grid_shift=grid_shift,
win=None,
grid_mask=cread_grid_mask_arr,
grid_mask_valid_value=grid_mask_in_unmasked_value,
grid_nodata=None,
)
# TO_CHECK
# cin_grid_mask = sma_in_buffer_grid_mask.array[0:cread_shape[0], 0:cread_shape[1]]
# If no mask is given : set it to 1
# cin_grid_mask = None
# cin_grid_mask[:,:] = 1
# Check against image in dimension - here we will have to take care of origin convention
# For now let keep it simple and assume it that 0 is the pixel center (and not 0.5)
# TODO : replace by a less memory consuming code : each test expression generate a
# temporary array
# No mask provided – we must at least ensure that addressed
# coordinates lie within the domain bounds.
# Oversampled grids pose a challenge: masking a single point may
# invalidate surrounding interpolated values, which can
# unintentionally exclude otherwise valid points.
# A better approach is to invalidate only those points outside the
# domain that have no valid neighbors within it.
# TODO: Implement masking for points outside the domain, ensuring
# consistency with the interpolation strategy described above.
# if cin_grid_mask is None:
# cin_grid_mask = sma_in_buffer_grid_mask.array[0:cread_shape[0], 0:cread_shape[1]]
# cin_grid_mask[:,:] = 1
# cin_grid_mask[ np.logical_or(
# np.logical_or(
# cread_grid_arr[0] < 0., cread_grid_arr[0] > array_src_ds.height - 1.
# ),
# np.logical_or(
# cread_grid_arr[1] < 0., cread_grid_arr[1] > array_src_ds.width - 1.
# )
# )] = 0
# grid_mask_in_unmasked_value = 1
if tile_shape is not None:
# Cut strip shape into tiled chunks.
logger.debug(
f"Chunk {chunk_idx} - Tiled processing with tiles of {tile_shape[0]} x "
f"{tile_shape[1]}"
)
chunk_tiles = chunks.get_chunk_shapes(cshape, tile_shape, merge_last=False)
logger.debug(
f"Chunk {chunk_idx} - Number of tiles to process :" f" {len(chunk_tiles)}"
)
for ctile in chunk_tiles:
logger.debug(f"Chunk {chunk_idx} - tile {ctile} " "- preparing args...")
# Compute current strip chunk parameters to pass to the
# 'build_mask_tile_worker' ('build_mask' wrapper) method
# - ctile_origin : the tile origin corresponds here to the
# coordinates relative to the current strip, ie the
# first element of each window
# - ctile_win : the window corresponding to the tile (convert
# the chunk index convention to the window index
# convention ; with no origin shift) relative to
# the current chunk
# - ctile_grid_win : the full-resolution window within the
# current chunk's grid that corresponds to the active
# tile.
ctile_origin = chunk_win[..., 0]
ctile_win = window_from_chunk(chunk=ctile, origin=None)
# Calculate the full-resolution window within the current
# chunk's grid that corresponds to the active tile. The tile
# window (`ctile_win`) is relative to the current chunk;
# therefore, we apply a relative shift (`win_rel`) to
# `ctile_win` to obtain the window aligned with the chunk's
# grid array, suitable for `basic_grid_resampling_array`.
ctile_grid_win = window_shift(ctile_win, np.asarray(win_rel[:, 0]))
logger.debug(
f"Chunk {chunk_idx} - tile {ctile} - "
f"tile's chunk origin : {ctile_origin}"
)
logger.debug(
f"Chunk {chunk_idx} - tile {ctile} - " f"tile window : {ctile_win}"
)
logger.debug(
f"Chunk {chunk_idx} - tile {ctile} - "
f"tile full-resolution window within the chunk's "
f"grid : {ctile_grid_win}"
)
basic_grid_resampling_array(
interp=interp,
grid_arr=cread_grid_arr,
grid_arr_shape=cread_shape,
grid_resolution=grid_resolution,
grid_nodata=None, # TODO
grid_mask_arr=cread_grid_mask_arr,
grid_mask_in_unmasked_value=grid_mask_in_unmasked_value,
array_src_ds=array_src_ds,
array_src_bands=array_src_bands,
array_src_mask_ds=array_src_mask_ds,
array_src_mask_band=array_src_mask_band,
array_src_mask_validity_pair=array_src_mask_validity_pair,
array_src_geometry_origin=array_src_geometry_origin,
array_src_geometry_pair=array_src_geometry_pair,
oversampled_grid_win=ctile_grid_win,
margin=margin,
sma_out_buffer=sma_w_array_buffer,
out_win=ctile_win,
nodata_out=nodata_out,
boundary_condition=boundary_condition,
trust_padding=trust_padding,
sma_out_mask_buffer=sma_w_mask_buffer,
logger_msg_prefix=f"Chunk {chunk_idx} - tile {ctile} - ",
logger=logger,
)
else:
# Resampling on full strip - no tiling
logger.debug(f"Chunk {chunk_idx} - Full strip computation " "(no tiling)")
basic_grid_resampling_array(
interp=interp,
grid_arr=cread_grid_arr,
grid_arr_shape=cread_shape,
grid_resolution=grid_resolution,
grid_nodata=None, # TODO
grid_mask_arr=cread_grid_mask_arr,
grid_mask_in_unmasked_value=grid_mask_in_unmasked_value,
array_src_ds=array_src_ds,
array_src_bands=array_src_bands,
array_src_mask_ds=array_src_mask_ds, # TODO
array_src_mask_band=array_src_mask_band, # TODO
array_src_mask_validity_pair=array_src_mask_validity_pair,
array_src_geometry_origin=array_src_geometry_origin,
array_src_geometry_pair=array_src_geometry_pair,
oversampled_grid_win=win_rel,
margin=margin,
sma_out_buffer=sma_w_array_buffer,
out_win=cslices_as_win,
nodata_out=nodata_out,
boundary_condition=boundary_condition,
trust_padding=trust_padding,
sma_out_mask_buffer=sma_w_mask_buffer,
logger_msg_prefix=f"Chunk {chunk_idx} - ",
logger=logger,
)
# Write full chunk at once
logger.debug(f"Chunk {chunk_idx} - write for full strip...")
# Manage output data type conversion
if sma_w_array_buffer_convert is None:
array_out_ds.write(
sma_w_array_buffer.array[cslices3], window=as_rio_window(cstrip_target_win)
)
else:
# We have to convert the data from float64 (computation type) to output type
logger.debug(f"Chunk {chunk_idx} - convert data type for full strip")
rounding_method = "round" if array_out_ds_dtype.kind in "iu" else None
# TODO : limit data dtype conversion to cslices3
array_convert(
sma_w_array_buffer.array,
sma_w_array_buffer_convert.array,
clip="auto",
rounding_method=rounding_method,
)
logger.debug(f"Chunk {chunk_idx} - write converted data for full strip...")
array_out_ds.write(
sma_w_array_buffer_convert.array[cslices3],
window=as_rio_window(cstrip_target_win),
)
logger.debug(f"Chunk {chunk_idx} - write ends.")
# Set mask to no valid
if mask_out_ds is not None:
logger.debug(f"Chunk {chunk_idx} - write mask for full strip...")
mask_out_ds.write(
sma_w_mask_buffer.array[cslices], 1, window=as_rio_window(cstrip_target_win)
)
logger.debug(f"Chunk {chunk_idx} - write mask ends.")
except Exception:
raise
finally:
# Release the Shared memory buffer
SharedArray.clear_buffers(sma_buffer_list)
return 1