Source code for src.toolbox.steps.custom.qc.impossible_date_qc

# This file is part of the NOC Autonomy Toolbox.
#
# Copyright 2025-2026 National Oceanography Centre and The Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""QC test to identify impossible dates in TIME variable."""

#### Mandatory imports ####
from toolbox.steps.base_qc import BaseQC, register_qc, flag_cols

#### Custom imports ####
import polars as pl
import xarray as xr
from datetime import datetime
import matplotlib
import matplotlib.pyplot as plt


@register_qc
[docs] class impossible_date_qc(BaseQC): """ Target Variable: TIME Flag Number: 4 (bad data) Variables Flagged: TIME Checks that the datetime of each point is valid. """
[docs] qc_name = "impossible date qc"
[docs] expected_parameters = {}
[docs] required_variables = ["TIME"]
[docs] qc_outputs = ["TIME_QC"]
[docs] def return_qc(self): # Convert to polars self.df = pl.from_pandas( self.data[self.required_variables].to_dataframe(), nan_to_null=False ) # Check if any of the datetime stamps fall outside 1985 and the current datetime # TODO: Add optional bounds via parameters (such as known deployment dates, for example) self.df = self.df.with_columns( pl.when(pl.col("TIME").is_null()) .then(9) .when( ( (pl.col("TIME") > datetime(1985, 1, 1)) & (pl.col("TIME") < datetime.now()) ) ) .then(1) .otherwise(4) .alias("TIME_QC") ) # Convert back to xarray flags = self.df.select(pl.col("^.*_QC$")) self.flags = xr.Dataset( data_vars={ col: ("N_MEASUREMENTS", flags[col].to_numpy()) for col in flags.columns }, coords={"N_MEASUREMENTS": self.data["N_MEASUREMENTS"]}, ) return self.flags
[docs] def plot_diagnostics(self): matplotlib.use("tkagg") fig, ax = plt.subplots(figsize=(6, 4), dpi=200) for i in range(10): # Plot by flag number plot_data = self.df.with_row_index().filter(pl.col("TIME_QC") == i) if len(plot_data) == 0: continue # Plot the data ax.plot( plot_data["index"], plot_data["TIME"], c=flag_cols[i], ls="", marker="o", label=f"{i}", ) ax.set( title="Impossible Date Test", xlabel="Index", ylabel="TIME", ) ax.legend(title="Flags", loc="upper right") fig.tight_layout() plt.show(block=True)