Source code for src.toolbox.steps.custom.qc.gross_range_qc

"""Gross Range Test QC Step."""

#### Mandatory imports ####
import numpy as np
from toolbox.steps.base_qc import BaseQC, register_qc, flag_cols

#### Custom imports ####
import matplotlib.pyplot as plt
import xarray as xr
import matplotlib


# TODO: Could be registered within range_test.py
@register_qc

[docs]
class gross_range_qc(BaseQC):
    """
    Outside range test similar to IOOS QC gross range test. Not to be confused with `range test`, which flags within a range.

    Given two values it checks for data points outside of this range and assigns a corresponding flag as defined in the configuration.
    The `variable_ranges` parameter is required for this test, but `also_flag` is not.

    Target Variable: Any
    Flag Number: Any
    Variables Flagged: Any

    EXAMPLE
    -------
    gross range qc:
        variable_ranges:
            TEMP:
                3: [0, 30]      #   Flags temperature data outside of this range as probably bad (3)
                4: [-2.5, 40]   #   Flags temperature data outside of this range as bad (4)
            CNDC:
                3: [5, 42]
                4: [2, 45]
        also_flag:
            TEMP: [DOXY] #  Flag DOXY based on TEMP flags
    """


[docs]
    qc_name = "gross range qc"


[docs]
    dynamic = True


    def __init__(self, data, **kwargs):
        required_kwargs = {"variable_ranges"}   #   Removed also_flag, in case test is intended to be run independently
        if not required_kwargs.issubset(kwargs):
            raise KeyError(
                f"{required_kwargs - set(kwargs)} missing from gross range test"
            )

[docs]
        self.variable_ranges = kwargs["variable_ranges"]

        #   Allow the also_flag param to be blank for this test
        if "also_flag" in kwargs.keys():
            if kwargs["also_flag"] is None:
                self.also_flag = dict() 
            else:
                self.also_flag = kwargs["also_flag"]
        else:
            self.also_flag = dict()

[docs]
        self.plot = kwargs.get("plot", [])  # Make plotting optional



[docs]
        self.required_variables = list(self.variable_ranges.keys())


[docs]
        self.tested_variables = self.required_variables.copy()



[docs]
        self.qc_outputs = list(
            set(f"{v}_QC" for v in self.tested_variables)
            | set(f"{v}_QC" for v in sum(self.also_flag.values(), []))
        )


        if data is not None:
            self.data = data.copy(deep=True)


[docs]
    def return_qc(self):
        """Select data outside of the ranges and flag accordingly."""
        # Subset the data
        self.data = self.data[self.required_variables]

        for var in self.tested_variables:
            qc = xr.zeros_like(self.data[var], dtype=int)

            # Apply flags from most severe to least
            for flag in sorted(self.variable_ranges[var], reverse=True):
                low, high = self.variable_ranges[var][flag]

                outside = (self.data[var] < low) | (self.data[var] > high)

                qc = xr.where((qc == 0) & outside, flag, qc)

            # Anything not flagged is good
            qc = xr.where(qc == 0, 1, qc)

            self.data[f"{var}_QC"] = qc

            # Propagate flags
            for extra_var in self.also_flag.get(var, []):
                self.data[f"{extra_var}_QC"] = qc

        # Select just the flags
        self.flags = self.data[[v for v in self.data.data_vars if v.endswith("_QC")]]

        return self.flags



[docs]
    def plot_diagnostics(self):
        """Visualise the QC results in a similar manner to range_test"""
        matplotlib.use("tkagg")

        # If not plots were specified
        if len(self.plot) == 0:
            self.log_warn(
                "WARNING: In 'range test gross' diagnostics were called but no plots were specified."
            )
            return

        # Plot the QC output
        fig, axs = plt.subplots(nrows=len(self.plot), figsize=(8, 6), dpi=200)
        if len(self.plot) == 1:
            axs = [axs]

        for ax, var in zip(axs, self.plot):
            # Check that the user specified var exists in the test set
            if f"{var}_QC" not in self.qc_outputs:
                self.log_warn(
                    f"WARNING: Cannot plot {var}_QC as it was not included in this test."
                )
                continue

            for i in range(10):
                # Plot by flag number
                plot_data = self.data[[var, "N_MEASUREMENTS"]].where(
                    self.data[f"{var}_QC"] == i, drop=True
                )

                if len(plot_data[var]) == 0:
                    continue

                # Plot the data
                ax.plot(
                    plot_data["N_MEASUREMENTS"],
                    plot_data[var],
                    c=flag_cols[i],
                    ls="",
                    marker="o",
                    label=f"{i}",
                )

            for bounds in self.variable_ranges[var].values():
                for bound in bounds:
                    ax.axhline(bound, ls="--", c="k")

            ax.set(
                xlabel="Index",
                ylabel=var,
                title=f"{var} Range Test",
            )

            ax.legend(title="Flags", loc="upper right")

        fig.tight_layout()
        plt.show(block=True)