Advanced example: define advanced metrics#
[1]:
%%javascript
// leave this in to disable autoscroll in Jupyter notebook
IPython.OutputArea.prototype._should_scroll = function(lines) {
return false;
}
Imports#
[2]:
import fcpy
Load data#
[3]:
# the `load_dataset' option is currently unavalable due to data handling system move.
# ds = fcpy.load_dataset(model='atmospheric-model', var_names=['2t', 'q', 'sst'], levels=list(range(1, 10)))
# The `data` folder contains two sample NetCDF files.
# Here we load specific humidity from CAMS at 32 bits
fpath = "../data/cams_q_20191201_v3.nc"
ds = fcpy.open_dataset(fpath)
ds = ds[["q"]] # Select q only as this dataset contains more vars...
ds
[3]:
<xarray.Dataset> Dimensions: (time: 1, lev: 137, lat: 451, lon: 900) Coordinates: * time (time) datetime64[ns] 2019-12-01T12:00:00 * lon (lon) float64 0.0 0.4 0.8 1.2 1.6 ... 358.0 358.4 358.8 359.2 359.6 * lat (lat) float64 -90.0 -89.6 -89.2 -88.8 -88.4 ... 88.8 89.2 89.6 90.0 * lev (lev) float64 1.0 2.0 3.0 4.0 5.0 ... 133.0 134.0 135.0 136.0 137.0 Data variables: q (time, lev, lat, lon) float32 dask.array<chunksize=(1, 137, 451, 900), meta=np.ndarray> Attributes: CDI: Climate Data Interface version 1.9.8 (https://mpimet.mpg.de... Conventions: CF-1.6 history: Mon Feb 15 18:37:28 2021: cdo -f nc4 copy tmp.grib data/mil... institution: European Centre for Medium-Range Weather Forecasts CDO: Climate Data Operators version 1.9.8 (https://mpimet.mpg.de...
Define own metric#
[4]:
# Advance use: define your own metric.
# Here we count the number of unique values per compressor type
# without relying on auto-chunking or precomputed histograms
# this gives full flexibility but with greater complexity
def unique_count(chunks, baseline, compressors, bits):
from fcpy import run_compressor_single
import xarray as xr
from collections import defaultdict
# Unique values of decompressed dataset
# ... over all compressors and bits
unique = defaultdict(set)
for chunk in chunks:
for compressor in compressors:
for bits_ in bits:
da_decompressed = run_compressor_single(chunk, compressor, bits_)
unique[(compressor.name, bits_)] |= set(
da_decompressed.values.flatten()
)
counts = xr.DataArray(
0,
dims=["compressor", "bits"],
coords={"compressor": [c.name for c in compressors], "bits": bits},
)
for compressor in compressors:
for bits_ in bits:
counts.loc[dict(compressor=compressor.name, bits=bits_)] = len(
unique[(compressor.name, bits_)]
)
return counts
Define and run experiment#
[5]:
suite = fcpy.Suite(
ds,
baseline=fcpy.Float(bits=32),
compressors=[fcpy.LinQuantization(), fcpy.Round()],
metrics=[fcpy.Difference, fcpy.AbsoluteError],
custom_metrics=[unique_count],
bits=[12, 14, 16, 18],
max_chunk_size_bytes=451 * 900 * 4,
skip_histograms=True,
)
Activating project at `~/work/field-compression/field-compression`
WARNING: method definition for == at /usr/share/miniconda/envs/fcpy/share/julia/packages/ChainRulesCore/ctmSK/src/tangent_types/tangent.jl:68 declares type variable T but does not use it.
WARNING: method definition for getindex at /usr/share/miniconda/envs/fcpy/share/julia/packages/ChainRulesCore/ctmSK/src/tangent_types/tangent.jl:120 declares type variable T but does not use it.
WARNING: method definition for getindex at /usr/share/miniconda/envs/fcpy/share/julia/packages/ChainRulesCore/ctmSK/src/tangent_types/tangent.jl:120 declares type variable P but does not use it.
WARNING: method definition for canonicalize at /usr/share/miniconda/envs/fcpy/share/julia/packages/ChainRulesCore/ctmSK/src/tangent_types/tangent.jl:240 declares type variable L but does not use it.
WARNING: method definition for canonicalize at /usr/share/miniconda/envs/fcpy/share/julia/packages/ChainRulesCore/ctmSK/src/tangent_types/tangent.jl:241 declares type variable L but does not use it.
Plot results#
[6]:
# As data are in xarray, custom plots and comparisons are easy!
# Verbosity here is to showcase full customization
import matplotlib.pyplot as plt
ds_unique_count = suite.custom_metrics[0]
ds_unique_count.q.plot.line(x="bits")
plt.title(f"{ds.q.long_name} in {ds.q.units}")
plt.xlabel("Bits")
plt.ylabel("Number of Unique Values");
[ ]: