Source code for src.toolbox.steps.custom.qc.valid_profile_qc

# This file is part of the NOC Autonomy Toolbox.
#
# Copyright 2025-2026 National Oceanography Centre and The Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""QC tests for assessing validity of a glider profile, based on different definitions of successful data."""

#### Mandatory imports ####
from toolbox.steps.base_qc import BaseQC, register_qc, flag_cols

#### Custom imports ####
import matplotlib.pyplot as plt
import polars as pl
import xarray as xr
import matplotlib


@register_qc
[docs] class valid_profile_qc(BaseQC): """ Target Variable: PROFILE_NUMBER Flag Number: 4 (bad data), 3 (potentially bad) Variables Flagged: PROFILE_NUMBER Checks that each profile is of a certain length (in number of points) and contains points within a specified depth range. """
[docs] qc_name = "valid profile qc"
[docs] expected_parameters = { "profile_length": 1000, "depth_range": (-1000, 0), }
[docs] required_variables = ["PROFILE_NUMBER", "DEPTH"]
[docs] qc_outputs = ["PROFILE_NUMBER"]
[docs] def return_qc(self): # Convert to polars self.df = pl.from_pandas( self.data[self.required_variables].to_dataframe(), nan_to_null=False ) # Check profiles are of a given length profile_lengths = self.df.group_by("PROFILE_NUMBER").count() self.df = self.df.join(profile_lengths, on="PROFILE_NUMBER", how="left") # Find profiles that have no data between the sepcified depth ranges profile_ranges = self.df.group_by("PROFILE_NUMBER").agg( (pl.col("DEPTH").is_between(*self.depth_range).any()).alias( "in_depth_range" ) ) self.df = self.df.join(profile_ranges, on="PROFILE_NUMBER", how="left") self.df = self.df.with_columns( pl.when(pl.col("PROFILE_NUMBER").is_nan()) .then(9) .when(pl.col("count") < self.profile_length) .then(4) .when(pl.col("in_depth_range").not_()) .then(3) .otherwise(1) .alias("PROFILE_NUMBER_QC") ) # Convert back to xarray flags = self.df.select(pl.col("^.*_QC$")) self.flags = xr.Dataset( data_vars={ col: ("N_MEASUREMENTS", flags[col].to_numpy()) for col in flags.columns }, coords={"N_MEASUREMENTS": self.data["N_MEASUREMENTS"]}, ) return self.flags
[docs] def plot_diagnostics(self): matplotlib.use("tkagg") fig, ax = plt.subplots(figsize=(8, 6), dpi=200) for i in range(10): # Plot by flag number plot_data = self.df.with_row_index().filter( pl.col("PROFILE_NUMBER_QC") == i ) if len(plot_data) == 0: continue # Plot the data ax.plot( plot_data["index"], plot_data["DEPTH"], c=flag_cols[i], ls="", marker="o", label=f"{i}", ) ax.set( xlabel="Index", ylabel="Pressure", title="Valid Profile Test", ) ax.legend(title="Flags", loc="upper right") fig.tight_layout() plt.show(block=True)