Source code for src.toolbox.steps.custom.qc.gross_range_qc

"""Gross Range Test QC Step."""

#### Mandatory imports ####
import numpy as np
from toolbox.steps.base_qc import BaseQC, register_qc, flag_cols

#### Custom imports ####
import matplotlib.pyplot as plt
import xarray as xr
import matplotlib


# TODO: Could be registered within range_test.py
@register_qc
[docs] class gross_range_qc(BaseQC): """ Outside range test similar to IOOS QC gross range test. Not to be confused with `range test`, which flags within a range. Given two values it checks for data points outside of this range and assigns a corresponding flag as defined in the configuration. The `variable_ranges` parameter is required for this test, but `also_flag` is not. Target Variable: Any Flag Number: Any Variables Flagged: Any EXAMPLE ------- gross range qc: variable_ranges: TEMP: 3: [0, 30] # Flags temperature data outside of this range as probably bad (3) 4: [-2.5, 40] # Flags temperature data outside of this range as bad (4) CNDC: 3: [5, 42] 4: [2, 45] also_flag: TEMP: [DOXY] # Flag DOXY based on TEMP flags """
[docs] qc_name = "gross range qc"
[docs] dynamic = True
def __init__(self, data, **kwargs): required_kwargs = {"variable_ranges"} # Removed also_flag, in case test is intended to be run independently if not required_kwargs.issubset(kwargs): raise KeyError( f"{required_kwargs - set(kwargs)} missing from gross range test" )
[docs] self.variable_ranges = kwargs["variable_ranges"]
# Allow the also_flag param to be blank for this test if "also_flag" in kwargs.keys(): if kwargs["also_flag"] is None: self.also_flag = dict() else: self.also_flag = kwargs["also_flag"] else: self.also_flag = dict()
[docs] self.plot = kwargs.get("plot", []) # Make plotting optional
[docs] self.required_variables = list(self.variable_ranges.keys())
[docs] self.tested_variables = self.required_variables.copy()
[docs] self.qc_outputs = list( set(f"{v}_QC" for v in self.tested_variables) | set(f"{v}_QC" for v in sum(self.also_flag.values(), [])) )
if data is not None: self.data = data.copy(deep=True)
[docs] def return_qc(self): """Select data outside of the ranges and flag accordingly.""" # Subset the data self.data = self.data[self.required_variables] for var in self.tested_variables: qc = xr.zeros_like(self.data[var], dtype=int) # Apply flags from most severe to least for flag in sorted(self.variable_ranges[var], reverse=True): low, high = self.variable_ranges[var][flag] outside = (self.data[var] < low) | (self.data[var] > high) qc = xr.where((qc == 0) & outside, flag, qc) # Anything not flagged is good qc = xr.where(qc == 0, 1, qc) self.data[f"{var}_QC"] = qc # Propagate flags for extra_var in self.also_flag.get(var, []): self.data[f"{extra_var}_QC"] = qc # Select just the flags self.flags = self.data[[v for v in self.data.data_vars if v.endswith("_QC")]] return self.flags
[docs] def plot_diagnostics(self): """Visualise the QC results in a similar manner to range_test""" matplotlib.use("tkagg") # If not plots were specified if len(self.plot) == 0: self.log_warn( "WARNING: In 'range test gross' diagnostics were called but no plots were specified." ) return # Plot the QC output fig, axs = plt.subplots(nrows=len(self.plot), figsize=(8, 6), dpi=200) if len(self.plot) == 1: axs = [axs] for ax, var in zip(axs, self.plot): # Check that the user specified var exists in the test set if f"{var}_QC" not in self.qc_outputs: self.log_warn( f"WARNING: Cannot plot {var}_QC as it was not included in this test." ) continue for i in range(10): # Plot by flag number plot_data = self.data[[var, "N_MEASUREMENTS"]].where( self.data[f"{var}_QC"] == i, drop=True ) if len(plot_data[var]) == 0: continue # Plot the data ax.plot( plot_data["N_MEASUREMENTS"], plot_data[var], c=flag_cols[i], ls="", marker="o", label=f"{i}", ) for bounds in self.variable_ranges[var].values(): for bound in bounds: ax.axhline(bound, ls="--", c="k") ax.set( xlabel="Index", ylabel=var, title=f"{var} Range Test", ) ax.legend(title="Flags", loc="upper right") fig.tight_layout() plt.show(block=True)