Advanced example: define advanced metrics#

[1]:
%%javascript
// leave this in to disable autoscroll in Jupyter notebook
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

Imports#

[2]:
import fcpy

Load data#

[3]:
# the `load_dataset' option is currently unavalable due to data handling system move.
# ds = fcpy.load_dataset(model='atmospheric-model', var_names=['2t', 'q', 'sst'], levels=list(range(1, 10)))

# The `data` folder contains two sample NetCDF files.
# Here we load specific humidity from CAMS at 32 bits
fpath = "../data/cams_q_20191201_v3.nc"

ds = fcpy.open_dataset(fpath)
ds = ds[["q"]]  # Select q only as this dataset contains more vars...
ds
[3]:
<xarray.Dataset>
Dimensions:  (time: 1, lev: 137, lat: 451, lon: 900)
Coordinates:
  * time     (time) datetime64[ns] 2019-12-01T12:00:00
  * lon      (lon) float64 0.0 0.4 0.8 1.2 1.6 ... 358.0 358.4 358.8 359.2 359.6
  * lat      (lat) float64 -90.0 -89.6 -89.2 -88.8 -88.4 ... 88.8 89.2 89.6 90.0
  * lev      (lev) float64 1.0 2.0 3.0 4.0 5.0 ... 133.0 134.0 135.0 136.0 137.0
Data variables:
    q        (time, lev, lat, lon) float32 dask.array<chunksize=(1, 137, 451, 900), meta=np.ndarray>
Attributes:
    CDI:          Climate Data Interface version 1.9.8 (https://mpimet.mpg.de...
    Conventions:  CF-1.6
    history:      Mon Feb 15 18:37:28 2021: cdo -f nc4 copy tmp.grib data/mil...
    institution:  European Centre for Medium-Range Weather Forecasts
    CDO:          Climate Data Operators version 1.9.8 (https://mpimet.mpg.de...

Define own metric#

[4]:
# Advance use: define your own metric.
# Here we count the number of unique values per compressor type
# without relying on auto-chunking or precomputed histograms
# this gives full flexibility but with greater complexity
def unique_count(chunks, baseline, compressors, bits):
    from fcpy import run_compressor_single
    import xarray as xr
    from collections import defaultdict

    # Unique values of decompressed dataset
    # ... over all compressors and bits
    unique = defaultdict(set)
    for chunk in chunks:
        for compressor in compressors:
            for bits_ in bits:
                da_decompressed = run_compressor_single(chunk, compressor, bits_)
                unique[(compressor.name, bits_)] |= set(
                    da_decompressed.values.flatten()
                )

    counts = xr.DataArray(
        0,
        dims=["compressor", "bits"],
        coords={"compressor": [c.name for c in compressors], "bits": bits},
    )
    for compressor in compressors:
        for bits_ in bits:
            counts.loc[dict(compressor=compressor.name, bits=bits_)] = len(
                unique[(compressor.name, bits_)]
            )
    return counts

Define and run experiment#

[5]:
suite = fcpy.Suite(
    ds,
    baseline=fcpy.Float(bits=32),
    compressors=[fcpy.LinQuantization(), fcpy.Round()],
    metrics=[fcpy.Difference, fcpy.AbsoluteError],
    custom_metrics=[unique_count],
    bits=[12, 14, 16, 18],
    max_chunk_size_bytes=451 * 900 * 4,
    skip_histograms=True,
)
  Activating project at `~/work/field-compression/field-compression`
WARNING: method definition for == at /usr/share/miniconda/envs/fcpy/share/julia/packages/ChainRulesCore/ctmSK/src/tangent_types/tangent.jl:68 declares type variable T but does not use it.
WARNING: method definition for getindex at /usr/share/miniconda/envs/fcpy/share/julia/packages/ChainRulesCore/ctmSK/src/tangent_types/tangent.jl:120 declares type variable T but does not use it.
WARNING: method definition for getindex at /usr/share/miniconda/envs/fcpy/share/julia/packages/ChainRulesCore/ctmSK/src/tangent_types/tangent.jl:120 declares type variable P but does not use it.
WARNING: method definition for canonicalize at /usr/share/miniconda/envs/fcpy/share/julia/packages/ChainRulesCore/ctmSK/src/tangent_types/tangent.jl:240 declares type variable L but does not use it.
WARNING: method definition for canonicalize at /usr/share/miniconda/envs/fcpy/share/julia/packages/ChainRulesCore/ctmSK/src/tangent_types/tangent.jl:241 declares type variable L but does not use it.

Plot results#

[6]:
# As data are in xarray, custom plots and comparisons are easy!
# Verbosity here is to showcase full customization
import matplotlib.pyplot as plt

ds_unique_count = suite.custom_metrics[0]
ds_unique_count.q.plot.line(x="bits")
plt.title(f"{ds.q.long_name} in {ds.q.units}")
plt.xlabel("Bits")
plt.ylabel("Number of Unique Values");
../_images/_notebooks_examples-advanced-user-defined-metrics_11_0.png
[ ]: