# Advanced example: define advanced metrics

In [None]:
%%javascript
// leave this in to disable autoscroll in Jupyter notebook
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

## Imports

In [None]:
import fcpy

## Load data

In [None]:
# the `load_dataset' option is currently unavalable due to data handling system move.
# ds = fcpy.load_dataset(model='atmospheric-model', var_names=['2t', 'q', 'sst'], levels=list(range(1, 10)))

# The `data` folder contains two sample NetCDF files.
# Here we load specific humidity from CAMS at 32 bits
fpath = "../data/cams_q_20191201_v3.nc"

ds = fcpy.open_dataset(fpath)
ds = ds[["q"]]  # Select q only as this dataset contains more vars...
ds

## Define own metric

In [None]:
# Advance use: define your own metric.
# Here we count the number of unique values per compressor type
# without relying on auto-chunking or precomputed histograms
# this gives full flexibility but with greater complexity
def unique_count(chunks, baseline, compressors, bits):
    from fcpy import run_compressor_single
    import xarray as xr
    from collections import defaultdict

    # Unique values of decompressed dataset
    # ... over all compressors and bits
    unique = defaultdict(set)
    for chunk in chunks:
        for compressor in compressors:
            for bits_ in bits:
                da_decompressed = run_compressor_single(chunk, compressor, bits_)
                unique[(compressor.name, bits_)] |= set(
                    da_decompressed.values.flatten()
                )

    counts = xr.DataArray(
        0,
        dims=["compressor", "bits"],
        coords={"compressor": [c.name for c in compressors], "bits": bits},
    )
    for compressor in compressors:
        for bits_ in bits:
            counts.loc[dict(compressor=compressor.name, bits=bits_)] = len(
                unique[(compressor.name, bits_)]
            )
    return counts

## Define and run experiment

In [None]:
suite = fcpy.Suite(
    ds,
    baseline=fcpy.Float(bits=32),
    compressors=[fcpy.LinQuantization(), fcpy.Round()],
    metrics=[fcpy.Difference, fcpy.AbsoluteError],
    custom_metrics=[unique_count],
    bits=[12, 14, 16, 18],
    max_chunk_size_bytes=451 * 900 * 4,
    skip_histograms=True,
)

## Plot results

In [None]:
# As data are in xarray, custom plots and comparisons are easy!
# Verbosity here is to showcase full customization
import matplotlib.pyplot as plt

ds_unique_count = suite.custom_metrics[0]
ds_unique_count.q.plot.line(x="bits")
plt.title(f"{ds.q.long_name} in {ds.q.units}")
plt.xlabel("Bits")
plt.ylabel("Number of Unique Values");