Source code for src.toolbox.steps.custom.qc.impossible_date_qc

# This file is part of the NOC Autonomy Toolbox.
#
# Copyright 2025-2026 National Oceanography Centre and The Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""QC test to identify impossible dates in TIME variable."""

#### Mandatory imports ####
from toolbox.steps.base_qc import BaseQC, register_qc, flag_cols

#### Custom imports ####
import polars as pl
import xarray as xr
from datetime import datetime
import matplotlib
import matplotlib.pyplot as plt


@register_qc

[docs]
class impossible_date_qc(BaseQC):
    """
    Target Variable: TIME
    Flag Number: 4 (bad data)
    Variables Flagged: TIME
    Checks that the datetime of each point is valid.
    """


[docs]
    qc_name = "impossible date qc"


[docs]
    expected_parameters = {}


[docs]
    required_variables = ["TIME"]


[docs]
    qc_outputs = ["TIME_QC"]



[docs]
    def return_qc(self):
        # Convert to polars
        self.df = pl.from_pandas(
            self.data[self.required_variables].to_dataframe(), nan_to_null=False
        )

        # Check if any of the datetime stamps fall outside 1985 and the current datetime
        # TODO: Add optional bounds via parameters (such as known deployment dates, for example)
        self.df = self.df.with_columns(
            pl.when(pl.col("TIME").is_null())
            .then(9)
            .when(
                (
                    (pl.col("TIME") > datetime(1985, 1, 1))
                    & (pl.col("TIME") < datetime.now())
                )
            )
            .then(1)
            .otherwise(4)
            .alias("TIME_QC")
        )

        # Convert back to xarray
        flags = self.df.select(pl.col("^.*_QC$"))
        self.flags = xr.Dataset(
            data_vars={
                col: ("N_MEASUREMENTS", flags[col].to_numpy()) for col in flags.columns
            },
            coords={"N_MEASUREMENTS": self.data["N_MEASUREMENTS"]},
        )

        return self.flags



[docs]
    def plot_diagnostics(self):
        matplotlib.use("tkagg")
        fig, ax = plt.subplots(figsize=(6, 4), dpi=200)
        for i in range(10):
            # Plot by flag number
            plot_data = self.df.with_row_index().filter(pl.col("TIME_QC") == i)
            if len(plot_data) == 0:
                continue

            # Plot the data
            ax.plot(
                plot_data["index"],
                plot_data["TIME"],
                c=flag_cols[i],
                ls="",
                marker="o",
                label=f"{i}",
            )
        ax.set(
            title="Impossible Date Test",
            xlabel="Index",
            ylabel="TIME",
        )
        ax.legend(title="Flags", loc="upper right")
        fig.tight_layout()
        plt.show(block=True)