Source code for src.toolbox.utils.processing_utils
# This file is part of the NOC Autonomy Toolbox.
#
# Copyright 2025-2026 National Oceanography Centre and The Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
# ----------------------------- NaN Handling ------------------------------
[docs]
def find_nans(data: np.ndarray):
"""
Handles generation of masks and location indices of nans.
Intended for 1D arrays.
Parameters
----------
data : np.ndarray
numpy array with nans
Returns
-------
nan_mask : np.ndarray
mask where incices with nans are True
nan_indices : np.ndarray
indices of locations where there are nans
non_nan_indices : np.ndarray
indices of locations where there are values
"""
nan_mask = np.isnan(data)
nan_indices = np.nonzero(nan_mask)[0]
non_nan_indices = np.nonzero(~nan_mask)[0]
return nan_mask, nan_indices, non_nan_indices
[docs]
def interpolate_nans(data, coords):
"""
Fills nan values in y using interpolation over x.
x and y must have the same dimensions.
Parameters
----------
data : np.ndarray
1D array of size N to interpolate
coords : np.ndarray
1D array of size N which the data will be interpolated over
Returns
-------
filled_data : np.ndarray
data with nans filled using linear interpolation
"""
# Convert datetimes to floats
args = [np.array(data), np.array(coords)]
for i, array in enumerate(args):
if np.issubdtype(array.dtype, np.datetime64):
elapsed_time = (args[i] - args[i][0]) / np.timedelta64(1, "s")
args[i] = elapsed_time
data, coords = args
non_nan_mask = ~np.isnan(data)
filled_data = np.interp(
coords,
coords[non_nan_mask],
data[non_nan_mask]
)
return filled_data
# ----------------------------- Filtering ---------------------------------
[docs]
def remove_outliers(data):
"""
Removes outliers (including NaNs) from data. Exclusion is based on
inter-quartile range.
Parameters
----------
data : np.ndarray | list
1D array of size N to interpolate
Returns
-------
filtered_data : np.ndarray
data with outliers removed
"""
data = np.array(data)
data = data[np.isfinite(data)] # remove NaNs
if len(data) == 0:
return data
q1, q3 = np.percentile(data, [25, 75])
iqr = q3 - q1
lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr
filtered_data = data[(data >= lower) & (data <= upper)]
return filtered_data