Examples of compressing data when saving to .nc#

import xarray as xr
from trajan.readers.omb import read_omb_csv
from pathlib import Path
import os
path_to_test_data = Path.cwd().parent / "tests" / "test_data" / "csv" / "omb_large.csv"
xr_buoys = read_omb_csv(path_to_test_data)
2024-12-16 16:27:18 fv-az1766-447 trajan.readers.omb[2121] DEBUG reading /home/runner/work/trajan/trajan/tests/test_data/csv/omb_large.csv..
2024-12-16 16:27:18 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 2182 is:
Date Time (UTC)                     01/Oct/2022 03:22:23
Device                2022_CIRFA_JR_drifter_11_waves_ISM
Direction                                             MO
Payload                                              NaN
Approx Lat/Lng     75.41388333333333,-3.2745333333333333
Payload (Text)                                       NaN
Length (Bytes)                                         0
Credits                                                1
Name: 2182, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:18 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 3230 is:
Date Time (UTC)            14/Sep/2022 16:31:34
Device                  2022_CIRFA_JR_drifter_4
Direction                                    MO
Payload                                     NaN
Approx Lat/Lng     69.15595,-20.155216666666668
Payload (Text)                              NaN
Length (Bytes)                                0
Credits                                       1
Name: 3230, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:19 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 4827 is:
Date Time (UTC)                    22/Aug/2022 15:33:24
Device                          2022_CIRFA_JR_drifter_4
Direction                                            MO
Payload                                             NaN
Approx Lat/Lng     73.25066666666666,-17.29468333333333
Payload (Text)                                      NaN
Length (Bytes)                                        0
Credits                                               1
Name: 4827, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:19 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 5447 is:
Date Time (UTC)                  14/Aug/2022 12:22:59
Device             2022_CIRFA_JR_drifter_11_waves_ISM
Direction                                          MO
Payload                                           NaN
Approx Lat/Lng             74.4819,-5.097383333333333
Payload (Text)                                    NaN
Length (Bytes)                                      0
Credits                                             1
Name: 5447, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:19 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 8079 is:
Date Time (UTC)          03/Jul/2022 09:31:30
Device                2022_CIRFA_JR_drifter_5
Direction                                  MO
Payload                                   NaN
Approx Lat/Lng     80.61855,8.161183333333334
Payload (Text)                            NaN
Length (Bytes)                              0
Credits                                     1
Name: 8079, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:19 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 8885 is:
Date Time (UTC)                    27/Jun/2022 03:22:21
Device               2022_CIRFA_JR_drifter_12_waves_ISM
Direction                                            MO
Payload                                             NaN
Approx Lat/Lng     79.44536666666667,0.1955333333333333
Payload (Text)                                      NaN
Length (Bytes)                                        0
Credits                                               1
Name: 8885, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:19 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 9619 is:
Date Time (UTC)         21/Jun/2022 12:01:44
Device               2022_CIRFA_JR_drifter_1
Direction                                 MO
Payload                                  NaN
Approx Lat/Lng     78.20038333333333,4.55995
Payload (Text)                           NaN
Length (Bytes)                             0
Credits                                    1
Name: 9619, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:19 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 10220 is:
Date Time (UTC)                  16/Jun/2022 18:27:19
Device             2022_CIRFA_JR_drifter_11_waves_ISM
Direction                                          MO
Payload                                           NaN
Approx Lat/Lng                        74.36745,3.3274
Payload (Text)                                    NaN
Length (Bytes)                                      0
Credits                                             1
Name: 10220, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 11000 is:
Date Time (UTC)                    10/Jun/2022 21:01:33
Device                          2022_CIRFA_JR_drifter_1
Direction                                            MO
Payload                                             NaN
Approx Lat/Lng     77.89728333333333,4.9623333333333335
Payload (Text)                                      NaN
Length (Bytes)                                        0
Credits                                               1
Name: 11000, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 11375 is:
Date Time (UTC)                    08/Jun/2022 06:22:14
Device               2022_CIRFA_JR_drifter_10_waves_ISM
Direction                                            MO
Payload                                             NaN
Approx Lat/Lng     76.78088333333334,3.2928333333333333
Payload (Text)                                      NaN
Length (Bytes)                                        0
Credits                                               1
Name: 11375, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 11735 is:
Date Time (UTC)                    05/Jun/2022 21:23:52
Device               2022_CIRFA_JR_drifter_11_waves_ISM
Direction                                            MO
Payload                                             NaN
Approx Lat/Lng     74.45173333333334,2.9425166666666667
Payload (Text)                                      NaN
Length (Bytes)                                        0
Credits                                               1
Name: 11735, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 12611 is:
Date Time (UTC)                  31/May/2022 00:23:09
Device             2022_CIRFA_JR_drifter_15_waves_LSM
Direction                                          MO
Payload                                           NaN
Approx Lat/Lng             79.62065,9.593133333333334
Payload (Text)                                    NaN
Length (Bytes)                                      0
Credits                                             1
Name: 12611, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 15394 is:
Date Time (UTC)                    14/May/2022 12:22:13
Device                2022_CIRFA_JR_drifter_9_waves_ISM
Direction                                            MO
Payload                                             NaN
Approx Lat/Lng     75.38088333333333,11.142883333333334
Payload (Text)                                      NaN
Length (Bytes)                                        0
Credits                                               1
Name: 15394, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 15624 is:
Date Time (UTC)                   13/May/2022 03:52:49
Device              2022_CIRFA_JR_drifter_12_waves_ISM
Direction                                           MO
Payload                                            NaN
Approx Lat/Lng     78.44003333333333,9.521116666666666
Payload (Text)                                     NaN
Length (Bytes)                                       0
Credits                                              1
Name: 15624, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 15835 is:
Date Time (UTC)                   11/May/2022 22:02:26
Device                         2022_CIRFA_JR_drifter_1
Direction                                           MO
Payload                                            NaN
Approx Lat/Lng     78.20156666666666,8.972616666666667
Payload (Text)                                     NaN
Length (Bytes)                                       0
Credits                                              1
Name: 15835, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 15964 is:
Date Time (UTC)                   11/May/2022 04:01:36
Device              2022_CIRFA_JR_drifter_12_waves_ISM
Direction                                           MO
Payload                                            NaN
Approx Lat/Lng     78.45698333333333,8.991233333333334
Payload (Text)                                     NaN
Length (Bytes)                                       0
Credits                                              1
Name: 15964, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 15973 is:
Date Time (UTC)                   11/May/2022 03:02:42
Device              2022_CIRFA_JR_drifter_15_waves_LSM
Direction                                           MO
Payload                                            NaN
Approx Lat/Lng     78.34886666666667,7.829066666666667
Payload (Text)                                     NaN
Length (Bytes)                                       0
Credits                                              1
Name: 15973, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 16187 is:
Date Time (UTC)                   09/May/2022 21:22:21
Device              2022_CIRFA_JR_drifter_12_waves_ISM
Direction                                           MO
Payload                                            NaN
Approx Lat/Lng     78.59153333333333,9.462316666666666
Payload (Text)                                     NaN
Length (Bytes)                                       0
Credits                                              1
Name: 16187, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:23 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:24 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:24 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:29 fv-az1766-447 trajan.accessor[2121] DEBUG Detecting trajectory dimension
2024-12-16 16:27:29 fv-az1766-447 trajan.accessor[2121] DEBUG Detecting time-variable for "obs"..
2024-12-16 16:27:29 fv-az1766-447 trajan.accessor[2121] DEBUG Detected obs-dim: obs, detected time-variable: time.
2024-12-16 16:27:29 fv-az1766-447 trajan.accessor[2121] DEBUG Detected un-structured (2D) trajectory dataset
2024-12-16 16:27:29 fv-az1766-447 trajan.traj[2121] DEBUG No grid-mapping specified, checking if coordinates are lon/lat..
2024-12-16 16:27:29 fv-az1766-447 trajan.traj[2121] DEBUG No grid-mapping specified, checking if coordinates are lon/lat..
2024-12-16 16:27:29 fv-az1766-447 trajan.traj[2121] DEBUG No grid-mapping specified, checking if coordinates are lon/lat..
2024-12-16 16:27:29 fv-az1766-447 trajan.traj[2121] DEBUG No grid-mapping specified, checking if coordinates are lon/lat..
# by default, to_netcdf does not perform any compression
xr_buoys.to_netcdf("no_compression.nc")

# on my machine, this is around 33MB
print(f"size no compression: {round(os.stat('no_compression.nc').st_size/(pow(1024,2)), 2)} MB")
size no compression: 32.03 MB
# one can perform compression by providing explicitly the right arguments
# note that the best way to compress may depend on your dataset, the access
# pattern you want to be fastest, etc - be aware of memory layout and
# performance!

# a simple compression, on a per-trajectory basis: each trajectory will
# be compressed as a chunk, this means that it will be fast to retrieve one
# full trajectory, but slow to retrieve e.g. the 5th point of all trajectories.

# choose the encoding chunking - this may be application dependent, here
# chunk trajectory as a whole
def generate_chunksize(var):
    dims = xr_buoys[var].dims
    shape = list(xr_buoys[var].shape)

    idx_trajectory = dims.index("trajectory")
    shape[idx_trajectory] = 1

    return tuple(shape)


# set the encoding for each variable
encoding = {
    var: {"zlib": True, "complevel": 5, "chunksizes": generate_chunksize(var)} \
        for var in xr_buoys.data_vars
}

# the encoding looks like:
for var in encoding:
    print(f"{var}: {encoding[var] = }")
print("")

# save, this time with compression
xr_buoys.to_netcdf("trajectory_compression.nc", encoding=encoding)

# on my machine, this is around 5.6MB
print(f"size with compression: {round(os.stat('trajectory_compression.nc').st_size/(pow(1024,2)), 2)} MB")
time: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 8782)}
lat: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 8782)}
lon: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 8782)}
time_waves_imu: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
accel_energy_spectrum: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464, 55)}
elevation_energy_spectrum: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464, 55)}
processed_elevation_energy_spectrum: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464, 55)}
pcutoff: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
pHs0: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
pT02: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
pT24: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
Hs0: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
T02: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
T24: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}

size with compression: 5.53 MB

Total running time of the script: (0 minutes 11.183 seconds)

Gallery generated by Sphinx-Gallery