Source code for gridr.core.utils.array_utils

# coding: utf8
#
# Copyright (c) 2025 Centre National d'Etudes Spatiales (CNES).
#
# This file is part of GRIDR
# (see https://github.com/CNES/gridr).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Array utils module
"""
# pylint: disable=C0413
import sys
from typing import Any, Literal, NoReturn, Optional, Tuple, Union

import numpy as np
import rasterio
from gridr.cdylib import (
    PyArrayWindow2,
    py_array1_replace_f32_i8,
    py_array1_replace_f32_u8,
    py_array1_replace_f64_i8,
    py_array1_replace_f64_u8,
    py_array1_replace_i8,
    py_array1_replace_u8,
)

PY311 = sys.version_info >= (3, 11)

if PY311:
    from typing import Self  # noqa: E402, F401
else:
    from typing_extensions import Self  # noqa: E402, F401
# pylint: enable=C0413


[docs] def array_replace( array: np.ndarray, val_cond: Union[int, float], val_true: Union[int, float], val_false: Union[int, float], array_cond: Optional[np.ndarray] = None, array_cond_val: Optional[Union[int, float]] = None, win: Optional[np.ndarray] = None, ) -> NoReturn: """Replaces elements within an array in-place based on specified conditions. This method is a Python wrapper around the Rust function `py_array1_replace_*`, designed for efficient in-place modification of NumPy arrays. It allows conditional replacement based on either the `array` itself or an optional `array_cond` (condition array). Parameters ---------- array : numpy.ndarray The array into which elements are replaced in-place. Must be C-contiguous and have a `dtype` of `int8`, `uint8`, `float32`, or `float64`. val_cond : int or float The primary condition value. Elements in `array` (or `array_cond` if provided) equal to `val_cond` will be affected. val_true : int or float The value to replace elements that satisfy the condition (i.e., input value equals `val_cond`). val_false : int or float The value to replace elements that do *not* satisfy the condition (i.e., input value does not equal `val_cond`). array_cond : numpy.ndarray, optional An optional 1D or 2D array on which to apply the condition. If provided, the replacement in `array` is based on the values in `array_cond`. Must have a `dtype` of `int8` or `uint8`. Defaults to `None`, in which case the `array` itself is used for the condition. array_cond_val : int or float, optional The condition value to use if `array_cond` is defined. This value is compared against elements in `array_cond`. This parameter is required if `array_cond` is provided. Defaults to `None`. win : numpy.ndarray, optional A window `win` to restrict the operation to a specific region of the `array`. This is a 2D NumPy array where each row represents a dimension and contains `(min_idx, max_idx)`. **Both `min_idx` and `max_idx` are inclusive**, adhering to the GridR's "window" convention. The window's dimensions must match those of the `array`. Defaults to `None`, meaning the operation applies to the entire array. Returns ------- NoReturn This function modifies the `array` in-place and does not return any value. Raises ------ AssertionError If `array`'s `dtype` is not one of `int8`, `uint8`, `float32`, or `float64`. AssertionError If `array` is not C-contiguous (`array.flags.c_contiguous` is `False`). AssertionError If `array_cond` is provided and its `dtype` is not `int8` or `uint8`. AssertionError If `array_cond` is provided but `array_cond_val` is `None`. AssertionError If `array_cond_val` is provided but `array_cond` is `None`. """ assert array.dtype in (np.int8, np.uint8, np.float32, np.float64) assert array.flags.c_contiguous is True if array_cond is not None: assert array_cond.dtype in (np.int8, np.uint8) assert array_cond_val is not None if array_cond_val is not None: assert array_cond is not None py_window = None if win is not None: py_window = PyArrayWindow2( start_row=win[0][0], end_row=win[0][1], start_col=win[1][0], end_col=win[1][1] ) nrow, ncol = array.shape array = array.reshape(-1) if array_cond is not None: array_cond = array_cond.reshape(-1) py_array_replace_func = { (np.dtype("int8"), np.dtype("int8")): py_array1_replace_i8, (np.dtype("float32"), np.dtype("int8")): py_array1_replace_f32_i8, (np.dtype("float64"), np.dtype("int8")): py_array1_replace_f64_i8, (np.dtype("uint8"), np.dtype("uint8")): py_array1_replace_u8, (np.dtype("float32"), np.dtype("uint8")): py_array1_replace_f32_u8, (np.dtype("float64"), np.dtype("uint8")): py_array1_replace_f64_u8, } if array_cond is not None: py_array_replace_func[(array.dtype, array_cond.dtype)]( array, nrow, ncol, val_cond, val_true, val_false, array_cond, array_cond_val, py_window ) else: py_array_replace_func[(array.dtype, array.dtype)]( array, nrow, ncol, val_cond, val_true, val_false, None, None, py_window )
[docs] def is_clip_required(in_dtype: np.dtype, out_dtype: np.dtype) -> bool: """ Determines if clipping is required when converting between data types. Args: in_dtype: The source data type out_dtype: The destination data type Returns: bool: True if clipping is required, False otherwise Raises: ValueError: If the conversion between types is not managed """ in_dtype_info = np.iinfo(in_dtype) if in_dtype.kind in "iu" else np.finfo(in_dtype) out_dtype_info = np.iinfo(out_dtype) if out_dtype.kind in "iu" else np.finfo(out_dtype) in_min, in_max = np.float64(in_dtype_info.min), np.float64(in_dtype_info.max) out_min, out_max = np.float64(out_dtype_info.min), np.float64(out_dtype_info.max) match (in_dtype.kind, out_dtype.kind): # Cases where we need to check if source values fit in destination range case ("f", "f") | ("i", "f") | ("i", "i") | ("f", "i"): clip_required = in_min < out_min or in_max > out_max # Cases where clipping is always required case ("f", "u") | ("i", "u"): clip_required = True # Unsigned to unsigned conversion case ("u", "u") | ("u", "i") | ("u", "f"): clip_required = in_max > out_max case _: raise ValueError(f"Conversion from {in_dtype} to {out_dtype} is not managed") return clip_required
[docs] def is_clip_to_dtype_limits_safe(in_dtype: np.dtype, out_dtype: np.dtype) -> bool: """ Determines whether clipping from an input type to the type limits of a target data type is safe. This function checks if converting from a data type to a target data type and clipping it to the target type's limits will preserve all values without overflow. Parameters ---------- in_dtype : np.dtype The input type to be checked for safe clipping. out_dtype : np.dtype The target data type to which the array would be converted. Returns ------- bool True if clipping to the target type limits is safe (no overflow will occur and the target limit can be expressed in the input data type with precision), False otherwise. Notes ----- This function is necessary when performing type conversions between different numerical data types, especially when converting between floating-point and integer types or between different floating-point precisions. The main concern is to prevent overflow when clipping values to the target type's limits. The function performs the following checks: 1. Only floating-point input types are considered for this check (integer inputs are assumed to be safe by default). 2. Checks if clipping is actually required between the input and output types. 3. Attempts to convert the maximum value of the output type to the input type and convert it back to the output type. 4. If this conversion results in an OverflowError or if the converted value doesn't match the expected maximum value of the output type, returns False. The function is particularly important when processing numerical data where preserving the integrity of values is critical, such as in scientific computing, financial applications, or any domain where numerical precision matters. The test is only performed on the max as the min of an integer type is a power of 2 and can safely expressed when clipping is required. """ is_safe = True # The test has only to be done considering float inputs if in_dtype.kind == "f" and is_clip_required(in_dtype, out_dtype): type_info = None if out_dtype.kind == "f": type_info = np.finfo(out_dtype) else: type_info = np.iinfo(out_dtype) try: is_safe = out_dtype.type(in_dtype.type(type_info.max)) == type_info.max except OverflowError: is_safe = False return is_safe
[docs] def array_convert( array_in: np.ndarray, array_out: np.ndarray, clip: Union[Literal["auto"], Tuple[Any, Any]] = "auto", safe: bool = True, rounding_method: Optional[Literal["round", "ceil", "floor"]] = "round", ) -> None: """ Convert an input array to a target dtype with optional clipping and rounding. The function performs type conversion from the input array to the output array, with optional clipping and rounding operations based on the specified parameters. Parameters ---------- array_in : numpy.ndarray The input array containing the data to convert. The dtype of this array determines the source data type. array_out : numpy.ndarray The output array that will contain the converted data. The dtype of this array determines the target data type. clip : Union[Literal['auto'], Tuple[Any, Any]]] Controls value clipping behavior: - 'auto': Automatic clipping based on the target dtype's range - Tuple: Specific numeric range for clipping (e.g., (min_value, max_value)). The tuple values should correspond to the target dtype's range safe : bool If True, check are performed to garanty that the automatic clipping can be performed without overflow caused by floating point precision loss. rounding_method : Optional[Literal['round', 'ceil', 'floor']], optional Specifies the rounding method to use when converting from float to integer: - 'round': Standard rounding (default) - 'ceil': Round up to nearest integer - 'floor': Round down to nearest integer - None: No rounding is performed Returns ------- None The function modifies both the input array and the output array inplace and returns nothing. Notes ----- - The function checks if clipping is required based on the input and output dtypes and the specified clipping range. - When 'auto' clipping is specified, the function automatically clips to the range of the output dtype. - The function currently raises an Exception if clipping from input type to output type is not safe. - The function raises ValueError if the specified clipping range is invalid for the output dtype. - No safety check is performed when the clipping range is not automatic """ in_dtype = array_in.dtype out_dtype = array_out.dtype # Get output dtype min and max codable values. out_dtype_info = np.iinfo(out_dtype) if out_dtype.kind in "iu" else np.finfo(out_dtype) out_dtype_range = (out_dtype_info.min, out_dtype_info.max) # Determines from types if clip is required clip_required = is_clip_required(in_dtype, out_dtype) # Define clip range clip_range = None if clip == "auto" and clip_required: clip_range = (out_dtype_range[0], out_dtype_range[1]) if safe and not is_clip_to_dtype_limits_safe(in_dtype, out_dtype): raise Exception( f"Clipping to output dtype limits is not safe from {in_dtype} to {out_dtype}" ) elif clip != "auto": clip_range = clip if min(clip_range) < out_dtype_info.min or max(clip_range) > out_dtype_info.max: raise ValueError(f"Clipping range {clip_range} is not allowed on type {out_dtype}") match rounding_method: case "round": np.round(array_in, decimals=0, out=array_in) case "ceil": np.ceil(array_in, out=array_in) case "floor": np.floor(array_in, out=array_in) case _: pass if clip_range is not None: clip_range_in = (in_dtype.type(clip_range[0]), in_dtype.type(clip_range[1])) np.clip(array_in, a_min=clip_range_in[0], a_max=clip_range_in[1], out=array_in) overflow = (array_in < clip_range[0]) | (array_in > clip_range[1]) if np.any(overflow): raise Exception(f"Some value(s) are out of {out_dtype} range after clipping") # raise Exception(array_in) array_out[:] = array_in else: array_out[:] = array_in
[docs] class ArrayProfile(object): """ A class to define array attributes for mocking or descriptive purposes. This class is designed to hold essential attributes of a NumPy array, such as its shape, number of dimensions, data type, and total size, allowing access to these members similar to a `numpy.ndarray` object without instantiating a full array. """ def __init__(self, shape: Tuple[int, ...], ndim: int, dtype: np.dtype): """ Initializes an `ArrayProfile` object. Parameters ---------- shape : tuple of int The shape of the array, e.g., `(rows, cols, bands)`. ndim : int The number of dimensions of the array. dtype : numpy.dtype The data type of the array's elements, e.g., `np.int16`, `np.float32`. Returns ------- None This constructor initializes the object's attributes. """ self.ndim = ndim self.shape = shape self.dtype = dtype self.size = np.prod(self.shape)
[docs] @classmethod def from_dataset(cls, ds: rasterio.io.DatasetReader) -> Self: """ Creates an `ArrayProfile` object from a `rasterio.io.DatasetReader`. This class method extracts relevant array attributes (shape, number of dimensions, and data type) from an opened Rasterio dataset. It adjusts the `ndim` and `shape` for single-band datasets to represent them as 2D arrays, consistent with typical image processing. Parameters ---------- ds : rasterio.io.DatasetReader A Rasterio dataset reader object, typically obtained via ` rasterio.open()`. Returns ------- ArrayProfile An instantiated `ArrayProfile` object populated with attributes derived from the provided Rasterio dataset. """ shape = (ds.count, ds.height, ds.width) ndim = 3 if ds.count == 1: shape = (ds.height, ds.width) ndim = 2 return cls(shape=shape, ndim=ndim, dtype=np.dtype(ds.profile["dtype"]))