Note
Go to the end to download the full example code.
Examples of compressing data when saving to .nc#
import xarray as xr
from trajan.readers.omb import read_omb_csv
from pathlib import Path
import os
path_to_test_data = Path.cwd().parent / "tests" / "test_data" / "csv" / "omb_large.csv"
xr_buoys = read_omb_csv(path_to_test_data)
2024-12-16 16:27:18 fv-az1766-447 trajan.readers.omb[2121] DEBUG reading /home/runner/work/trajan/trajan/tests/test_data/csv/omb_large.csv..
2024-12-16 16:27:18 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 2182 is:
Date Time (UTC) 01/Oct/2022 03:22:23
Device 2022_CIRFA_JR_drifter_11_waves_ISM
Direction MO
Payload NaN
Approx Lat/Lng 75.41388333333333,-3.2745333333333333
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 2182, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:18 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 3230 is:
Date Time (UTC) 14/Sep/2022 16:31:34
Device 2022_CIRFA_JR_drifter_4
Direction MO
Payload NaN
Approx Lat/Lng 69.15595,-20.155216666666668
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 3230, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:19 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 4827 is:
Date Time (UTC) 22/Aug/2022 15:33:24
Device 2022_CIRFA_JR_drifter_4
Direction MO
Payload NaN
Approx Lat/Lng 73.25066666666666,-17.29468333333333
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 4827, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:19 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 5447 is:
Date Time (UTC) 14/Aug/2022 12:22:59
Device 2022_CIRFA_JR_drifter_11_waves_ISM
Direction MO
Payload NaN
Approx Lat/Lng 74.4819,-5.097383333333333
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 5447, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:19 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 8079 is:
Date Time (UTC) 03/Jul/2022 09:31:30
Device 2022_CIRFA_JR_drifter_5
Direction MO
Payload NaN
Approx Lat/Lng 80.61855,8.161183333333334
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 8079, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:19 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 8885 is:
Date Time (UTC) 27/Jun/2022 03:22:21
Device 2022_CIRFA_JR_drifter_12_waves_ISM
Direction MO
Payload NaN
Approx Lat/Lng 79.44536666666667,0.1955333333333333
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 8885, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:19 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 9619 is:
Date Time (UTC) 21/Jun/2022 12:01:44
Device 2022_CIRFA_JR_drifter_1
Direction MO
Payload NaN
Approx Lat/Lng 78.20038333333333,4.55995
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 9619, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:19 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 10220 is:
Date Time (UTC) 16/Jun/2022 18:27:19
Device 2022_CIRFA_JR_drifter_11_waves_ISM
Direction MO
Payload NaN
Approx Lat/Lng 74.36745,3.3274
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 10220, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 11000 is:
Date Time (UTC) 10/Jun/2022 21:01:33
Device 2022_CIRFA_JR_drifter_1
Direction MO
Payload NaN
Approx Lat/Lng 77.89728333333333,4.9623333333333335
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 11000, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 11375 is:
Date Time (UTC) 08/Jun/2022 06:22:14
Device 2022_CIRFA_JR_drifter_10_waves_ISM
Direction MO
Payload NaN
Approx Lat/Lng 76.78088333333334,3.2928333333333333
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 11375, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 11735 is:
Date Time (UTC) 05/Jun/2022 21:23:52
Device 2022_CIRFA_JR_drifter_11_waves_ISM
Direction MO
Payload NaN
Approx Lat/Lng 74.45173333333334,2.9425166666666667
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 11735, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 12611 is:
Date Time (UTC) 31/May/2022 00:23:09
Device 2022_CIRFA_JR_drifter_15_waves_LSM
Direction MO
Payload NaN
Approx Lat/Lng 79.62065,9.593133333333334
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 12611, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 15394 is:
Date Time (UTC) 14/May/2022 12:22:13
Device 2022_CIRFA_JR_drifter_9_waves_ISM
Direction MO
Payload NaN
Approx Lat/Lng 75.38088333333333,11.142883333333334
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 15394, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 15624 is:
Date Time (UTC) 13/May/2022 03:52:49
Device 2022_CIRFA_JR_drifter_12_waves_ISM
Direction MO
Payload NaN
Approx Lat/Lng 78.44003333333333,9.521116666666666
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 15624, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 15835 is:
Date Time (UTC) 11/May/2022 22:02:26
Device 2022_CIRFA_JR_drifter_1
Direction MO
Payload NaN
Approx Lat/Lng 78.20156666666666,8.972616666666667
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 15835, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 15964 is:
Date Time (UTC) 11/May/2022 04:01:36
Device 2022_CIRFA_JR_drifter_12_waves_ISM
Direction MO
Payload NaN
Approx Lat/Lng 78.45698333333333,8.991233333333334
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 15964, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 15973 is:
Date Time (UTC) 11/May/2022 03:02:42
Device 2022_CIRFA_JR_drifter_15_waves_LSM
Direction MO
Payload NaN
Approx Lat/Lng 78.34886666666667,7.829066666666667
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 15973, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:20 fv-az1766-447 trajan.readers.omb[2121] DEBUG omb_dataframe at index 16187 is:
Date Time (UTC) 09/May/2022 21:22:21
Device 2022_CIRFA_JR_drifter_12_waves_ISM
Direction MO
Payload NaN
Approx Lat/Lng 78.59153333333333,9.462316666666666
Payload (Text) NaN
Length (Bytes) 0
Credits 1
Name: 16187, dtype: object
this is empty (Length (Bytes) is 0), drop
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:21 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:23 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:24 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:24 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:26 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:27 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG found outlier in sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG start applying sliding_filter_nsigma
2024-12-16 16:27:28 fv-az1766-447 trajan.readers.omb[2121] DEBUG done applying sliding_filter_nsigma
2024-12-16 16:27:29 fv-az1766-447 trajan.accessor[2121] DEBUG Detecting trajectory dimension
2024-12-16 16:27:29 fv-az1766-447 trajan.accessor[2121] DEBUG Detecting time-variable for "obs"..
2024-12-16 16:27:29 fv-az1766-447 trajan.accessor[2121] DEBUG Detected obs-dim: obs, detected time-variable: time.
2024-12-16 16:27:29 fv-az1766-447 trajan.accessor[2121] DEBUG Detected un-structured (2D) trajectory dataset
2024-12-16 16:27:29 fv-az1766-447 trajan.traj[2121] DEBUG No grid-mapping specified, checking if coordinates are lon/lat..
2024-12-16 16:27:29 fv-az1766-447 trajan.traj[2121] DEBUG No grid-mapping specified, checking if coordinates are lon/lat..
2024-12-16 16:27:29 fv-az1766-447 trajan.traj[2121] DEBUG No grid-mapping specified, checking if coordinates are lon/lat..
2024-12-16 16:27:29 fv-az1766-447 trajan.traj[2121] DEBUG No grid-mapping specified, checking if coordinates are lon/lat..
# by default, to_netcdf does not perform any compression
xr_buoys.to_netcdf("no_compression.nc")
# on my machine, this is around 33MB
print(f"size no compression: {round(os.stat('no_compression.nc').st_size/(pow(1024,2)), 2)} MB")
size no compression: 32.03 MB
# one can perform compression by providing explicitly the right arguments
# note that the best way to compress may depend on your dataset, the access
# pattern you want to be fastest, etc - be aware of memory layout and
# performance!
# a simple compression, on a per-trajectory basis: each trajectory will
# be compressed as a chunk, this means that it will be fast to retrieve one
# full trajectory, but slow to retrieve e.g. the 5th point of all trajectories.
# choose the encoding chunking - this may be application dependent, here
# chunk trajectory as a whole
def generate_chunksize(var):
dims = xr_buoys[var].dims
shape = list(xr_buoys[var].shape)
idx_trajectory = dims.index("trajectory")
shape[idx_trajectory] = 1
return tuple(shape)
# set the encoding for each variable
encoding = {
var: {"zlib": True, "complevel": 5, "chunksizes": generate_chunksize(var)} \
for var in xr_buoys.data_vars
}
# the encoding looks like:
for var in encoding:
print(f"{var}: {encoding[var] = }")
print("")
# save, this time with compression
xr_buoys.to_netcdf("trajectory_compression.nc", encoding=encoding)
# on my machine, this is around 5.6MB
print(f"size with compression: {round(os.stat('trajectory_compression.nc').st_size/(pow(1024,2)), 2)} MB")
time: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 8782)}
lat: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 8782)}
lon: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 8782)}
time_waves_imu: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
accel_energy_spectrum: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464, 55)}
elevation_energy_spectrum: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464, 55)}
processed_elevation_energy_spectrum: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464, 55)}
pcutoff: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
pHs0: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
pT02: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
pT24: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
Hs0: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
T02: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
T24: encoding[var] = {'zlib': True, 'complevel': 5, 'chunksizes': (1, 1464)}
size with compression: 5.53 MB
Total running time of the script: (0 minutes 11.183 seconds)