"""
Input output functions (but mostly input)
"""
from __future__ import division
import os
import pickle
import warnings
from datetime import datetime
import numpy as np
import netCDF4
from salem.utils import memory, cached_shapefile_path
from salem import gis, utils, wgs84, wrftools, proj_to_cartopy
try:
import xarray as xr
from xarray.backends.netCDF4_ import NetCDF4DataStore, close_on_error, \
_nc4_group
has_xarray = True
except ImportError:
has_xarray = False
# dummy replacement so that it compiles
class xr(object):
pass
class NullDecl(object):
def __init__(self, dummy):
pass
def __call__(self, func):
return func
xr.register_dataset_accessor = NullDecl
xr.register_dataarray_accessor = NullDecl
NetCDF4DataStore = object
[docs]def read_shapefile(fpath, cached=False):
"""Reads a shapefile using geopandas.
For convenience, it adds four columns to the dataframe:
[min_x, max_x, min_y, max_y]
Because reading a shapefile can take a long time, Salem provides a
caching utility (cached=True). This will save a pickle of the shapefile
in the cache directory.
"""
import geopandas as gpd
_, ext = os.path.splitext(fpath)
if ext.lower() in ['.shp', '.p']:
if cached:
cpath = cached_shapefile_path(fpath)
# unpickle if cached, read and pickle if not
if os.path.exists(cpath):
with open(cpath, 'rb') as f:
out = pickle.load(f)
else:
out = read_shapefile(fpath, cached=False)
with open(cpath, 'wb') as f:
pickle.dump(out, f)
else:
out = gpd.read_file(fpath)
out['min_x'] = [g.bounds[0] for g in out.geometry]
out['max_x'] = [g.bounds[2] for g in out.geometry]
out['min_y'] = [g.bounds[1] for g in out.geometry]
out['max_y'] = [g.bounds[3] for g in out.geometry]
else:
raise ValueError('File extension not recognised: {}'.format(ext))
return out
@memory.cache(ignore=['grid'])
def _memory_transform(shape_cpath, grid=None, grid_str=None):
"""Quick solution using joblib in order to not transform many times the
same shape (useful for maps).
Since grid is a complex object joblib seemed to have trouble with it,
so joblib is checking its cache according to grid_str while the job is
done with grid.
"""
shape = read_shapefile(shape_cpath, cached=True)
e = grid.extent_in_crs(crs=shape.crs)
p = np.nonzero(~((shape['min_x'] > e[1]) |
(shape['max_x'] < e[0]) |
(shape['min_y'] > e[3]) |
(shape['max_y'] < e[2])))
shape = shape.iloc[p]
shape = gis.transform_geopandas(shape, to_crs=grid, inplace=True)
return shape
[docs]def read_shapefile_to_grid(fpath, grid):
"""Same as read_shapefile but directly transformed to a grid.
The whole thing is cached so that the second call will
will be much faster.
Parameters
----------
fpath: path to the file
grid: the arrival grid
"""
# ensure it is a cached pickle (copy code smell)
shape_cpath = cached_shapefile_path(fpath)
if not os.path.exists(shape_cpath):
out = read_shapefile(fpath, cached=False)
with open(shape_cpath, 'wb') as f:
pickle.dump(out, f)
return _memory_transform(shape_cpath, grid=grid, grid_str=str(grid))
# TODO: remove this once we sure that we have all WRF files right
tmp_check_wrf = True
def _wrf_grid_from_dataset(ds):
"""Get the WRF projection out of the file."""
pargs = dict()
if hasattr(ds, 'PROJ_ENVI_STRING'):
# HAR and other TU Berlin files
dx = ds.GRID_DX
dy = ds.GRID_DY
pargs['lat_1'] = ds.PROJ_STANDARD_PAR1
pargs['lat_2'] = ds.PROJ_STANDARD_PAR2
pargs['lat_0'] = ds.PROJ_CENTRAL_LAT
pargs['lon_0'] = ds.PROJ_CENTRAL_LON
pargs['center_lon'] = ds.PROJ_CENTRAL_LON
if ds.PROJ_NAME == 'Lambert Conformal Conic':
proj_id = 1
else:
proj_id = 99 # pragma: no cover
else:
# Normal WRF file
cen_lon = ds.CEN_LON
cen_lat = ds.CEN_LAT
dx = ds.DX
dy = ds.DY
pargs['lat_1'] = ds.TRUELAT1
pargs['lat_2'] = ds.TRUELAT2
pargs['lat_0'] = ds.MOAD_CEN_LAT
pargs['lon_0'] = ds.STAND_LON
pargs['center_lon'] = ds.CEN_LON
proj_id = ds.MAP_PROJ
if proj_id == 1:
# Lambert
p4 = '+proj=lcc +lat_1={lat_1} +lat_2={lat_2} ' \
'+lat_0={lat_0} +lon_0={lon_0} ' \
'+x_0=0 +y_0=0 +a=6370000 +b=6370000'
p4 = p4.format(**pargs)
elif proj_id == 3:
# Mercator
p4 = '+proj=merc +lat_ts={lat_1} ' \
'+lon_0={center_lon} ' \
'+x_0=0 +y_0=0 +a=6370000 +b=6370000'
p4 = p4.format(**pargs)
else:
raise NotImplementedError('WRF proj not implemented yet: '
'{}'.format(proj_id))
proj = gis.check_crs(p4)
if proj is None:
raise RuntimeError('WRF proj not understood: {}'.format(p4))
# Here we have to accept xarray and netCDF4 datasets
try:
nx = len(ds.dimensions['west_east'])
ny = len(ds.dimensions['south_north'])
except AttributeError:
# maybe an xarray dataset
nx = ds.dims['west_east']
ny = ds.dims['south_north']
if hasattr(ds, 'PROJ_ENVI_STRING'):
# HAR
x0 = ds.GRID_X00
y0 = ds.GRID_Y00
else:
# Normal WRF file
e, n = gis.transform_proj(wgs84, proj, cen_lon, cen_lat)
x0 = -(nx-1) / 2. * dx + e # DL corner
y0 = -(ny-1) / 2. * dy + n # DL corner
grid = gis.Grid(nxny=(nx, ny), ll_corner=(x0, y0), dxdy=(dx, dy),
proj=proj)
if tmp_check_wrf:
# Temporary asserts
if 'XLONG' in ds.variables:
# Normal WRF
mylon, mylat = grid.ll_coordinates
reflon = ds.variables['XLONG']
reflat = ds.variables['XLAT']
if len(reflon.shape) == 3:
reflon = reflon[0, :, :]
reflat = reflat[0, :, :]
assert np.allclose(reflon, mylon, atol=1e-4)
assert np.allclose(reflat, mylat, atol=1e-4)
if 'lon' in ds.variables:
# HAR
mylon, mylat = grid.ll_coordinates
reflon = ds.variables['lon']
reflat = ds.variables['lat']
if len(reflon.shape) == 3:
reflon = reflon[0, :, :]
reflat = reflat[0, :, :]
assert np.allclose(reflon, mylon, atol=1e-4)
assert np.allclose(reflat, mylat, atol=1e-4)
return grid
def _lonlat_grid_from_dataset(ds):
"""Seek for longitude and latitude coordinates."""
# Do we have some standard names as variable?
vns = ds.variables.keys()
xc = utils.str_in_list(vns, utils.valid_names['x_dim'])
yc = utils.str_in_list(vns, utils.valid_names['y_dim'])
# Sometimes there are more than one coordinates, one of which might have
# more dims (e.g. lons in WRF files): take the first one with ndim = 1:
x = None
for xp in xc:
if len(ds.variables[xp].shape) == 1:
x = xp
y = None
for yp in yc:
if len(ds.variables[yp].shape) == 1:
y = yp
if (x is None) or (y is None):
return None
# OK, get it
lon = ds.variables[x][:]
lat = ds.variables[y][:]
# double check for dubious variables
if not utils.str_in_list([x], utils.valid_names['lon_var']) or \
not utils.str_in_list([y], utils.valid_names['lat_var']):
# name not usual. see if at least the range follows some conv
if (np.max(np.abs(lon)) > 360.1) or (np.max(np.abs(lat)) > 90.1):
return None
# Make the grid
dx = lon[1]-lon[0]
dy = lat[1]-lat[0]
args = dict(nxny=(lon.shape[0], lat.shape[0]), proj=wgs84, dxdy=(dx, dy))
args['corner'] = (lon[0], lat[0])
return gis.Grid(**args)
def _salem_grid_from_dataset(ds):
"""Seek for coordinates that Salem might have created.
Current convention: x_coord, y_coord, pyproj_srs as attribute
"""
# Projection
try:
proj = ds.pyproj_srs
except AttributeError:
proj = None
proj = gis.check_crs(proj)
if proj is None:
return None
# Do we have some standard names as variable?
vns = ds.variables.keys()
xc = utils.str_in_list(vns, utils.valid_names['x_dim'])
yc = utils.str_in_list(vns, utils.valid_names['y_dim'])
# Sometimes there are more than one coordinates, one of which might have
# more dims (e.g. lons in WRF files): take the first one with ndim = 1:
x = None
for xp in xc:
if len(ds.variables[xp].shape) == 1:
x = xp
y = None
for yp in yc:
if len(ds.variables[yp].shape) == 1:
y = yp
if (x is None) or (y is None):
return None
# OK, get it
x = ds.variables[x][:]
y = ds.variables[y][:]
# Make the grid
dx = x[1]-x[0]
dy = y[1]-y[0]
args = dict(nxny=(x.shape[0], y.shape[0]), proj=proj, dxdy=(dx, dy))
args['corner'] = (x[0], y[0])
return gis.Grid(**args)
def grid_from_dataset(ds):
"""Find out if the dataset contains enough info for Salem to understand.
ds can be an xarray dataset or a NetCDF dataset,
or anything that resembles it.
Returns a :py:class:`~salem.Grid` if successful, None otherwise
"""
# try if it is a salem file
out = _salem_grid_from_dataset(ds)
if out is not None:
return out
# maybe it's a WRF file?
if hasattr(ds, 'MOAD_CEN_LAT') or hasattr(ds, 'PROJ_ENVI_STRING'):
# WRF and HAR have some special attributes
return _wrf_grid_from_dataset(ds)
# Try out platte carree
return _lonlat_grid_from_dataset(ds)
def netcdf_time(ncobj, monthbegin=False):
"""Check if the netcdf file contains a time that Salem understands."""
import pandas as pd
time = None
try:
vt = utils.str_in_list(ncobj.variables.keys(),
utils.valid_names['time_var'])[0]
except IndexError:
# no time variable
return None
if hasattr(ncobj, 'TITLE') and 'GEOGRID' in ncobj.TITLE:
# geogrid file
pass
elif ncobj[vt].dtype in ['|S1', '|S19']:
# WRF file
time = []
try:
stimes = ncobj.variables['Times'][:].values
except AttributeError:
stimes = ncobj.variables['Times'][:]
for t in stimes:
time.append(pd.to_datetime(t.tostring().decode(),
errors='raise',
format='%Y-%m-%d_%H:%M:%S'))
elif vt is not None:
# CF time
var = ncobj.variables[vt]
time = netCDF4.num2date(var[:], var.units)
if monthbegin:
# sometimes monthly data is centered in the month (stupid)
time = [datetime(t.year, t.month, 1) for t in time]
return time
class _XarrayAccessorBase(object):
"""Common logic for for both data structures (DataArray and Dataset).
http://xarray.pydata.org/en/stable/internals.html#extending-xarray
"""
def __init__(self, xarray_obj):
self._obj = xarray_obj
if isinstance(xarray_obj, xr.DataArray):
xarray_obj = xarray_obj.to_dataset(name='var')
try: # maybe there was already some georef
xarray_obj.attrs['pyproj_srs'] = xarray_obj['var'].pyproj_srs
except:
pass
self.grid = grid_from_dataset(xarray_obj)
if self.grid is None:
raise RuntimeError('dataset Grid not understood.')
dn = xarray_obj.dims.keys()
self.x_dim = utils.str_in_list(dn, utils.valid_names['x_dim'])[0]
self.y_dim = utils.str_in_list(dn, utils.valid_names['y_dim'])[0]
dim = utils.str_in_list(dn, utils.valid_names['t_dim'])
self.t_dim = dim[0] if dim else None
dim = utils.str_in_list(dn, utils.valid_names['z_dim'])
self.z_dim = dim[0] if dim else None
def subset(self, margin=0, ds=None, **kwargs):
"""subset(self, margin=0, shape=None, geometry=None, grid=None,
corners=None, crs=wgs84, roi=None)
Get a subset of the dataset.
Accepts all keywords of :py:func:`~Grid.roi`
Parameters
----------
ds : Dataset or DataArray
form the ROI from the extent of the Dataset or DataArray
shape : str
path to a shapefile
geometry : geometry
a shapely geometry (don't forget the crs keyword)
grid : Grid
a Grid object which extent will form the ROI
corners : tuple
a ((x0, y0), (x1, y1)) tuple of the corners of the square
to subset the dataset to (don't forget the crs keyword)
crs : crs, default wgs84
coordinate reference system of the geometry and corners
roi : ndarray
a mask for the region of interest to subset the dataset onto
margin : int
add a margin to the region to subset (can be negative!). Can
be used a single keyword, too: set_subset(margin=-5) will remove
five pixels from each boundary of the dataset.
"""
if ds is not None:
grid = ds.salem.grid
kwargs.setdefault('grid', grid)
mask = self.grid.region_of_interest(**kwargs)
if not kwargs:
# user just wants a margin
mask[:] = 1
ids = np.nonzero(mask)
sub_x = [np.min(ids[1]) - margin, np.max(ids[1]) + margin]
sub_y = [np.min(ids[0]) - margin, np.max(ids[0]) + margin]
out_ds = self._obj[{self.x_dim : slice(sub_x[0], sub_x[1]+1),
self.y_dim : slice(sub_y[0], sub_y[1]+1)}
]
return out_ds
def roi(self, ds=None, **kwargs):
"""roi(self, shape=None, geometry=None, grid=None, corners=None,
crs=wgs84, roi=None)
Make a region of interest (ROI) for the dataset.
All grid points outside the ROI will be masked out.
Parameters
----------
ds : Dataset or DataArray
form the ROI from the extent of the Dataset or DataArray
shape : str
path to a shapefile
geometry : geometry
a shapely geometry (don't forget the crs keyword)
grid : Grid
a Grid object which extent will form the ROI
corners : tuple
a ((x0, y0), (x1, y1)) tuple of the corners of the square
to subset the dataset to (don't forget the crs keyword)
crs : crs, default wgs84
coordinate reference system of the geometry and corners
roi : ndarray
if you have a mask ready, you can give it here
"""
if ds is not None:
grid = ds.salem.grid
kwargs.setdefault('grid', grid)
mask = self.grid.region_of_interest(**kwargs)
coords = {self.y_dim: self._obj[self.y_dim].values,
self.x_dim: self._obj[self.x_dim].values}
mask = xr.DataArray(mask, coords=coords,
dims=(self.y_dim, self.x_dim))
out = self._obj.where(mask)
# keep attrs
out.attrs = self._obj.attrs
if isinstance(out, xr.DataArray):
out.name = self._obj.name
# add pyproj string everywhere
out.attrs['pyproj_srs'] = self.grid.proj.srs
if isinstance(out, xr.Dataset):
for v in out.variables:
out.variables[v].attrs = self._obj.variables[v].attrs
out.variables[v].attrs['pyproj_srs'] = self.grid.proj.srs
return out
def get_map(self, **kwargs):
"""Make a salem.Map out of the dataset.
All keywords are passed to :py:class:salem.Map
"""
from salem.graphics import Map
return Map(self.grid, **kwargs)
def _quick_map(self, obj, ax=None, interp='nearest', **kwargs):
"""Make a plot of a data array."""
# some metadata?
title = obj.name or ''
if obj._title_for_slice():
title += ' (' + obj._title_for_slice() + ')'
cb = obj.attrs['units'] if 'units' in obj.attrs else ''
smap = self.get_map(**kwargs)
smap.set_data(obj.values, interp=interp)
smap.visualize(ax=ax, title=title, cbar_title=cb)
return smap
def cartopy(self):
"""Get a cartopy.crs.Projection for this dataset."""
return proj_to_cartopy(self.grid.proj)
def transform(self, other, grid=None, interp='nearest', ks=3):
"""Reprojects an other Dataset or DataArray onto this grid.
Parameters
----------
other: Dataset, DataArray or ndarray
the data to project onto self
grid: salem.Grid
in case the input dataset does not carry georef info
interp : str
'nearest' (default), 'linear', or 'spline'
ks : int
Degree of the bivariate spline. Default is 3.
Returns
-------
a dataset or a dataarray
"""
was_dataarray = False
if not isinstance(other, xr.Dataset):
try:
other = other.to_dataset(name=other.name)
was_dataarray = True
except AttributeError:
# must be a ndarray
rdata = self.grid.map_gridded_data(other, grid=grid,
interp=interp, ks=ks)
# let's guess
sh = rdata.shape
nd = len(sh)
if nd == 2:
dims = (self.y_dim, self.x_dim)
elif nd == 3:
newdim = 'new_dim'
if self.t_dim and sh[0] == self._obj.dims[self.t_dim]:
newdim = self.t_dim
if self.z_dim and sh[0] == self._obj.dims[self.z_dim]:
newdim = self.z_dim
dims = (newdim, self.y_dim, self.x_dim)
else:
raise NotImplementedError('more than 3 dims not ok yet.')
coords = {}
for d in dims:
if d in self._obj:
coords[d] = self._obj[d]
out = xr.DataArray(rdata, coords=coords, dims=dims)
out.attrs['pyproj_srs'] = self.grid.proj.srs
return out
# go
out = xr.Dataset()
for v in other.data_vars:
var = other[v]
rdata = self.grid.map_gridded_data(var, interp=interp, ks=ks)
# remove old coords
dims = [d for d in var.dims]
coords = {}
for c in var.coords:
n = utils.str_in_list([c], utils.valid_names['x_dim'])
if n:
dims = [self.x_dim if x in n else x for x in dims]
continue
n = utils.str_in_list([c], utils.valid_names['y_dim'])
if n:
dims = [self.y_dim if x in n else x for x in dims]
continue
coords[c] = var.coords[c]
# add new ones
coords[self.x_dim] = self._obj[self.x_dim]
coords[self.y_dim] = self._obj[self.y_dim]
rdata = xr.DataArray(rdata, coords=coords, attrs=var.attrs, dims=dims)
rdata.attrs['pyproj_srs'] = self.grid.proj.srs
out[v] = rdata
if was_dataarray:
out = out[v]
else:
out.attrs['pyproj_srs'] = self.grid.proj.srs
return out
@xr.register_dataarray_accessor('salem')
class DataArrayAccessor(_XarrayAccessorBase):
[docs] def quick_map(self, ax=None, interp='nearest', **kwargs):
"""Make a plot of the DataArray."""
return self._quick_map(self._obj, ax=ax, interp=interp, **kwargs)
[docs] def interpz(self, zcoord, levels, dim_name='', use_multiprocessing=True):
"""Interpolates the array along the vertical dimension
Parameters
----------
zcoord: DataArray
the z coordinates of the variable. Must be of same dimensions
levels: 1dArray
the levels at which to interpolate
dim_name: str
the name of the new dimension
use_multiprocessing: bool
set to false if, for some reason, you don't want to use mp
Returns
-------
a new DataArray with the interpolated data
"""
if self.z_dim is None:
raise RuntimeError('zdimension not recognized')
data = wrftools.interp3d(self._obj.values, zcoord.values,
np.atleast_1d(levels), use_multiprocessing=
use_multiprocessing)
dims = list(self._obj.dims)
zd = np.nonzero([self.z_dim == d for d in dims])[0][0]
dims[zd] = dim_name
coords = dict(self._obj.coords)
coords.pop(self.z_dim, None)
coords[dim_name] = np.atleast_1d(levels)
out = xr.DataArray(data, name=self._obj.name, dims=dims, coords=coords)
out.attrs['pyproj_srs'] = self.grid.proj.srs
if not np.asarray(levels).shape:
out = out.isel(**{dim_name:0})
return out
@xr.register_dataset_accessor('salem')
class DatasetAccessor(_XarrayAccessorBase):
[docs] def quick_map(self, varname, ax=None, interp='nearest', **kwargs):
"""Make a plot of a variable of the DataSet."""
return self._quick_map(self._obj[varname], ax=ax, interp=interp,
**kwargs)
[docs] def wrf_zlevel(self, varname, levels=None, use_multiprocessing=True):
"""Interpolates to a specified height above sea level.
Parameters
----------
varname: str
the name of the variable to interpolate
levels: 1d array
levels at which to interpolate (default: some levels I thought of)
use_multiprocessing: bool
set to false if, for some reason, you don't want to use mp
Returns
-------
an interpolated DataArray
"""
if levels is None:
levels = np.array([10, 20, 30, 50, 75, 100, 200, 300, 500, 750,
1000, 2000, 3000, 5000, 7500, 10000])
zcoord = self._obj['Z']
out = self._obj[varname].salem.interpz(zcoord, levels, dim_name='z',
use_multiprocessing=
use_multiprocessing)
out['z'].attrs['description'] = 'height above sea level'
out['z'].attrs['units'] = 'm'
return out
[docs] def wrf_plevel(self, varname, levels=None, use_multiprocessing=True):
"""Interpolates to a specified pressure level (hPa).
Parameters
----------
varname: str
the name of the variable to interpolate
levels: 1d array
levels at which to interpolate (default: some levels I thought of)
use_multiprocessing: bool
set to false if, for some reason, you don't want to use mp
Returns
-------
an interpolated DataArray
"""
if levels is None:
levels = np.array([1000, 975, 950, 925, 900, 850, 800, 750, 700,
650, 600, 550, 500, 450, 400, 300, 200, 100])
zcoord = self._obj['PRESSURE']
out = self._obj[varname].salem.interpz(zcoord, levels, dim_name='p',
use_multiprocessing=
use_multiprocessing)
out['p'].attrs['description'] = 'pressure'
out['p'].attrs['units'] = 'hPa'
return out
class _NetCDF4DataStore(NetCDF4DataStore):
"""Just another way to init xarray's datastore
"""
def __init__(self, filename, mode='r', format='NETCDF4', group=None,
writer=None, clobber=True, diskless=False, persist=False,
ds=None):
import netCDF4 as nc4
if format is None:
format = 'NETCDF4'
if ds is None:
ds = nc4.Dataset(filename, mode=mode, clobber=clobber,
diskless=diskless, persist=persist,
format=format)
with close_on_error(ds):
self.ds = _nc4_group(ds, group, mode)
self.format = format
self.is_remote = False
self._filename = filename
super(NetCDF4DataStore, self).__init__(writer)
[docs]def open_xr_dataset(file):
"""Thin wrapper around xarray's open_dataset.
This is needed because variables often have not enough georef attrs
to be understood alone, and datasets tend to loose their attrs with
operations...
Returns:
--------
xr.Dataset
"""
# if geotiff, use Salem
p, ext = os.path.splitext(file)
if (ext.lower() == '.tif') or (ext.lower() == '.tiff'):
from salem import GeoTiff
geo = GeoTiff(file)
# TODO: currently everything is loaded in memory (baaad)
da = xr.DataArray(geo.get_vardata(),
coords={'x': geo.grid.x_coord,
'y': geo.grid.y_coord},
dims=['y', 'x'])
ds = xr.Dataset()
ds.attrs['pyproj_srs'] = geo.grid.proj.srs
ds['data'] = da
ds['data'].attrs['pyproj_srs'] = geo.grid.proj.srs
return ds
# otherwise rely on xarray
ds = xr.open_dataset(file)
# did we get it? If not no need to go further
try:
grid = ds.salem.grid
except:
warnings.warn('File not recognised as Salem grid. Fall back to xarray',
RuntimeWarning)
return ds
# add cartesian coords for WRF
if 'west_east' in ds.variables:
ds['west_east'] = ds.salem.grid.x_coord
ds['south_north'] = ds.salem.grid.y_coord
# add pyproj string everywhere
ds.attrs['pyproj_srs'] = grid.proj.srs
for v in ds.data_vars:
ds[v].attrs['pyproj_srs'] = grid.proj.srs
return ds
[docs]def open_wrf_dataset(file):
"""Wrapper around xarray's open_dataset to make WRF files a bit better.
This is needed because variables often have not enough georef attrs
to be understood alone, and datasets tend to loose their attrs with
operations...
Returns:
--------
xr.Dataset
"""
nc = netCDF4.Dataset(file)
# Change staggered variables to unstaggered ones
for vn, v in nc.variables.items():
if wrftools.Unstaggerer.can_do(v):
nc.variables[vn] = wrftools.Unstaggerer(v)
# Check if we can add diagnostic variables to the pot
for vn in wrftools.var_classes:
cl = getattr(wrftools, vn)
if cl.can_do(nc):
nc.variables[vn] = cl(nc)
# trick xarray with our custom netcdf
ds = xr.open_dataset(_NetCDF4DataStore(file, ds=nc))
# remove time dimension to lon lat
for vn in ['XLONG', 'XLAT']:
try:
v = ds[vn].isel(Time=0)
ds[vn] = xr.DataArray(v.values, dims=['south_north', 'west_east'])
except ValueError:
pass
# Convert time
ds['Time'] = netcdf_time(ds)
ds.rename({'Time': 'time', 'XLAT': 'lat', 'XLONG': 'lon'},
inplace=True)
if 'XTIME' in ds.variables:
ds.rename({'XTIME': 'xtime'}, inplace=True)
# drop ugly vars
vns = ['Times', 'XLAT_V', 'XLAT_U', 'XLONG_U', 'XLONG_V']
vns = [vn for vn in vns if vn in ds.variables]
ds = ds.drop(vns)
# add cartesian coords
ds['west_east'] = ds.salem.grid.x_coord
ds['south_north'] = ds.salem.grid.y_coord
# add pyproj string everywhere
ds.attrs['pyproj_srs'] = ds.salem.grid.proj.srs
for v in ds.data_vars:
ds[v].attrs['pyproj_srs'] = ds.salem.grid.proj.srs
return ds