"""
Trajan API
"""
import importlib.metadata
import collections
import numpy as np
import xarray as xr
import pandas as pd
import logging
from . import accessor as _
from . import traj as _
from . import skill as _
from . import readers as _
logger = logging.getLogger(__name__)
__version__ = importlib.metadata.version("trajan")
[docs]
def read_csv(f, **kwargs):
"""
Construct a CF-compliant trajectory dataset from a CSV file. A thin wrapper around: :meth:`from_dataframe`.
"""
df = pd.read_csv(f)
return from_dataframe(df, **kwargs)
[docs]
def from_dataframe(df: pd.DataFrame,
lon='lon',
lat='lat',
time='time',
name=None,
*,
__test_condense__=False):
"""
Construct a CF-compliant trajectory dataset from a `pd.DataFrame` of positions.
Args:
`lon`: str
Name of column containing longitudes.
`lat`: str
Name of column containing latitudes.
`time`: str
Name of column containing timestamps (parsable by pandas).
`name`: str
Name of column to be used for drifter names.
Returns:
`ds`: a CF-compliant trajectory `xarray.Dataset`.
Constructing a dataset from arrays of postions and time:
--------------------------------------------------------
.. testcode::
import pandas as pd
import trajan as ta
# Construct some synthetic data
lon = np.linspace(5, 10, 50)
lat = np.linspace(60, 70, 50)
temp = np.linspace(10, 15, 50)
time = pd.date_range('2023-01-01', '2023-01-14', periods=50)
# Construct a pandas.DataFrame
ds = pd.DataFrame({'lon': lon, 'lat': lat, 'time': time, 'temp': temp, 'name': 'My drifter'})
ds = ta.from_dataframe(ds, name='name')
print(ds)
.. testoutput::
<xarray.Dataset>
Dimensions: (trajectory: 1, obs: 50)
Coordinates:
* trajectory (trajectory) <U10 'My drifter'
* obs (obs) int64 0 1 2 3 4 5 6 7 8 9 ... 41 42 43 44 45 46 47 48 49
Data variables:
lon (trajectory, obs) float64 5.0 5.102 5.204 ... 9.796 9.898 10.0
lat (trajectory, obs) float64 60.0 60.2 60.41 ... 69.59 69.8 70.0
time (trajectory, obs) datetime64[ns] 2023-01-01 ... 2023-01-14
temp (trajectory, obs) float64 10.0 10.1 10.2 ... 14.8 14.9 15.0
Attributes:
Conventions: CF-1.10
featureType: trajectory
geospatial_lat_min: 60.0
geospatial_lat_max: 70.0
geospatial_lon_min: 5.0
geospatial_lon_max: 10.0
time_coverage_start: 2023-01-01T00:00:00
time_coverage_end: 2023-01-14T00:00:00
Often you might want to add some attributes:
.. testcode::
ds = ds.assign_attrs({'Author': 'Albus Dumbledore'})
print(ds)
.. testoutput::
<xarray.Dataset>
Dimensions: (trajectory: 1, obs: 50)
Coordinates:
* trajectory (trajectory) <U10 'My drifter'
* obs (obs) int64 0 1 2 3 4 5 6 7 8 9 ... 41 42 43 44 45 46 47 48 49
Data variables:
lon (trajectory, obs) float64 5.0 5.102 5.204 ... 9.796 9.898 10.0
lat (trajectory, obs) float64 60.0 60.2 60.41 ... 69.59 69.8 70.0
time (trajectory, obs) datetime64[ns] 2023-01-01 ... 2023-01-14
temp (trajectory, obs) float64 10.0 10.1 10.2 ... 14.8 14.9 15.0
Attributes:
Conventions: CF-1.10
featureType: trajectory
geospatial_lat_min: 60.0
geospatial_lat_max: 70.0
geospatial_lon_min: 5.0
geospatial_lon_max: 10.0
time_coverage_start: 2023-01-01T00:00:00
time_coverage_end: 2023-01-14T00:00:00
Author: Albus Dumbledore
"""
df = df.copy()
df.index.names = ['obs']
df[time] = pd.to_datetime(df[time], format='mixed')
df = df.rename(columns={lat: 'lat', lon: 'lon', time: 'time'})
if name is not None:
if name in df.columns:
df = df.rename(columns={name: 'trajectory'})
else:
df['trajectory'] = name
else:
df['trajectory'] = 'Drifter 1'
# Convert to UTC and remove tz-info. Xarray does not support tz-aware
# datetimes.
if df['time'].dt.tz is not None:
df['time'] = df['time'].dt.tz_convert(None)
# Classify trajectories based on drifter names.
df = df.set_index(['trajectory', df.index])
df = df.to_xarray()
if not __test_condense__:
df = df.traj.condense_obs()
df = df.traj.assign_cf_attrs()
return df
[docs]
def trajectory_dict_to_dataset(trajectory_dict,
variable_attributes=None,
global_attributes=None):
"""Create a CF-compatible trajectory file from dictionary of drifter positions
Trajectory_dict shall have the following structure::
{'buoy1_name': {
time0: {'lon': lon0, 'lat': lat0, 'variable1': var1_0, ... 'variableM': varM_0},
time1: {'lon': lon1, 'lat': lat1, 'variable1': var1_1, ... 'variableM': varM_1},
...
timeN: {'lon': lonN, 'lat': latN, 'variable1': var1_N, ... 'variableM': varM_N}},
{'buoy2_name': {
...
"""
if variable_attributes is None:
variable_attributes = dict()
if global_attributes is None:
global_attributes = dict()
drifter_names = list(trajectory_dict)
num_drifters = len(drifter_names)
num_times = np.max([len(d) for dn, d in trajectory_dict.items()])
# Allocate arrays
variables = []
for dn, d in trajectory_dict.items():
v = list(d[list(d)[0]])
for var in v:
if var not in variables:
variables.extend([var])
datavars = {}
for var in variables:
datavars[var] = np.empty((num_drifters, num_times))
datavars[var][:] = np.nan
if var not in variable_attributes:
if var == 'lat':
variable_attributes[var] = {
'standard_name': 'latitude',
'units': 'degree_north'
}
elif var == 'lon':
variable_attributes[var] = {
'standard_name': 'longitude',
'units': 'degree_east'
}
else:
variable_attributes[var] = {}
time = np.empty((num_drifters, num_times), dtype='datetime64[s]')
time[:] = np.datetime64('nat')
# Fill arrays with data from dictionaries
for drifter_num, (drifter_name,
drifter_dict) in enumerate(trajectory_dict.items()):
# Make sure that positions are sorted by time
drifter_dict = collections.OrderedDict(sorted(drifter_dict.items()))
t = slice(0, len(drifter_dict))
for var in variables:
datavars[var][drifter_num, t] = np.array(
[di[var] for d, di in drifter_dict.items()])
time[drifter_num, t] = np.array(list(drifter_dict),
dtype='datetime64[s]')
datavars = {
v: (['trajectory', 'obs'], datavars[v], variable_attributes[v])
for v in variables
}
datavars['time'] = (['trajectory', 'obs'], time, {'standard_name': 'time'})
datavars['drifter_names'] = (['trajectory'], drifter_names, {
'cf_role': 'trajectory_id',
'standard_name': 'platform_id'
})
# Remove empty attributes
global_attributes = {a: v for a, v in global_attributes.items() if v != ''}
# Create Xarray Dataset adhering to CF conventions h.4.1 for trajectory data
ds = xr.Dataset(data_vars=datavars,
attrs={
'Conventions': 'CF-1.10',
'featureType': 'trajectory',
'geospatial_lat_min': np.nanmin(datavars['lat'][1]),
'geospatial_lat_max': np.nanmax(datavars['lat'][1]),
'geospatial_lon_min': np.nanmin(datavars['lon'][1]),
'geospatial_lon_max': np.nanmax(datavars['lon'][1]),
'time_coverage_start':
str(np.nanmin(datavars['time'][1])),
'time_coverage_end':
str(np.nanmax(datavars['time'][1])),
**global_attributes
})
return ds