Source code for salem.sio

"""
Input output functions (but mostly input)
"""
from __future__ import division

import os
import pickle
import warnings
from datetime import datetime

import numpy as np
import netCDF4

from salem.utils import memory, cached_shapefile_path
from salem import gis, utils, wgs84, wrftools, proj_to_cartopy

try:
    import xarray as xr
    from xarray.backends.netCDF4_ import NetCDF4DataStore, close_on_error, \
        _nc4_group
    has_xarray = True
except ImportError:
    has_xarray = False
    # dummy replacement so that it compiles
    class xr(object):
        pass
    class NullDecl(object):
        def __init__(self, dummy):
            pass
        def __call__(self, func):
            return func
    xr.register_dataset_accessor = NullDecl
    xr.register_dataarray_accessor = NullDecl
    NetCDF4DataStore = object


[docs]def read_shapefile(fpath, cached=False): """Reads a shapefile using geopandas. For convenience, it adds four columns to the dataframe: [min_x, max_x, min_y, max_y] Because reading a shapefile can take a long time, Salem provides a caching utility (cached=True). This will save a pickle of the shapefile in the cache directory. """ import geopandas as gpd _, ext = os.path.splitext(fpath) if ext.lower() in ['.shp', '.p']: if cached: cpath = cached_shapefile_path(fpath) # unpickle if cached, read and pickle if not if os.path.exists(cpath): with open(cpath, 'rb') as f: out = pickle.load(f) else: out = read_shapefile(fpath, cached=False) with open(cpath, 'wb') as f: pickle.dump(out, f) else: out = gpd.read_file(fpath) out['min_x'] = [g.bounds[0] for g in out.geometry] out['max_x'] = [g.bounds[2] for g in out.geometry] out['min_y'] = [g.bounds[1] for g in out.geometry] out['max_y'] = [g.bounds[3] for g in out.geometry] else: raise ValueError('File extension not recognised: {}'.format(ext)) return out
@memory.cache(ignore=['grid']) def _memory_transform(shape_cpath, grid=None, grid_str=None): """Quick solution using joblib in order to not transform many times the same shape (useful for maps). Since grid is a complex object joblib seemed to have trouble with it, so joblib is checking its cache according to grid_str while the job is done with grid. """ shape = read_shapefile(shape_cpath, cached=True) e = grid.extent_in_crs(crs=shape.crs) p = np.nonzero(~((shape['min_x'] > e[1]) | (shape['max_x'] < e[0]) | (shape['min_y'] > e[3]) | (shape['max_y'] < e[2]))) shape = shape.iloc[p] shape = gis.transform_geopandas(shape, to_crs=grid, inplace=True) return shape
[docs]def read_shapefile_to_grid(fpath, grid): """Same as read_shapefile but directly transformed to a grid. The whole thing is cached so that the second call will will be much faster. Parameters ---------- fpath: path to the file grid: the arrival grid """ # ensure it is a cached pickle (copy code smell) shape_cpath = cached_shapefile_path(fpath) if not os.path.exists(shape_cpath): out = read_shapefile(fpath, cached=False) with open(shape_cpath, 'wb') as f: pickle.dump(out, f) return _memory_transform(shape_cpath, grid=grid, grid_str=str(grid))
# TODO: remove this once we sure that we have all WRF files right tmp_check_wrf = True def _wrf_grid_from_dataset(ds): """Get the WRF projection out of the file.""" pargs = dict() if hasattr(ds, 'PROJ_ENVI_STRING'): # HAR and other TU Berlin files dx = ds.GRID_DX dy = ds.GRID_DY pargs['lat_1'] = ds.PROJ_STANDARD_PAR1 pargs['lat_2'] = ds.PROJ_STANDARD_PAR2 pargs['lat_0'] = ds.PROJ_CENTRAL_LAT pargs['lon_0'] = ds.PROJ_CENTRAL_LON pargs['center_lon'] = ds.PROJ_CENTRAL_LON if ds.PROJ_NAME == 'Lambert Conformal Conic': proj_id = 1 else: proj_id = 99 # pragma: no cover else: # Normal WRF file cen_lon = ds.CEN_LON cen_lat = ds.CEN_LAT dx = ds.DX dy = ds.DY pargs['lat_1'] = ds.TRUELAT1 pargs['lat_2'] = ds.TRUELAT2 pargs['lat_0'] = ds.MOAD_CEN_LAT pargs['lon_0'] = ds.STAND_LON pargs['center_lon'] = ds.CEN_LON proj_id = ds.MAP_PROJ if proj_id == 1: # Lambert p4 = '+proj=lcc +lat_1={lat_1} +lat_2={lat_2} ' \ '+lat_0={lat_0} +lon_0={lon_0} ' \ '+x_0=0 +y_0=0 +a=6370000 +b=6370000' p4 = p4.format(**pargs) elif proj_id == 3: # Mercator p4 = '+proj=merc +lat_ts={lat_1} ' \ '+lon_0={center_lon} ' \ '+x_0=0 +y_0=0 +a=6370000 +b=6370000' p4 = p4.format(**pargs) else: raise NotImplementedError('WRF proj not implemented yet: ' '{}'.format(proj_id)) proj = gis.check_crs(p4) if proj is None: raise RuntimeError('WRF proj not understood: {}'.format(p4)) # Here we have to accept xarray and netCDF4 datasets try: nx = len(ds.dimensions['west_east']) ny = len(ds.dimensions['south_north']) except AttributeError: # maybe an xarray dataset nx = ds.dims['west_east'] ny = ds.dims['south_north'] if hasattr(ds, 'PROJ_ENVI_STRING'): # HAR x0 = ds.GRID_X00 y0 = ds.GRID_Y00 else: # Normal WRF file e, n = gis.transform_proj(wgs84, proj, cen_lon, cen_lat) x0 = -(nx-1) / 2. * dx + e # DL corner y0 = -(ny-1) / 2. * dy + n # DL corner grid = gis.Grid(nxny=(nx, ny), ll_corner=(x0, y0), dxdy=(dx, dy), proj=proj) if tmp_check_wrf: # Temporary asserts if 'XLONG' in ds.variables: # Normal WRF mylon, mylat = grid.ll_coordinates reflon = ds.variables['XLONG'] reflat = ds.variables['XLAT'] if len(reflon.shape) == 3: reflon = reflon[0, :, :] reflat = reflat[0, :, :] assert np.allclose(reflon, mylon, atol=1e-4) assert np.allclose(reflat, mylat, atol=1e-4) if 'lon' in ds.variables: # HAR mylon, mylat = grid.ll_coordinates reflon = ds.variables['lon'] reflat = ds.variables['lat'] if len(reflon.shape) == 3: reflon = reflon[0, :, :] reflat = reflat[0, :, :] assert np.allclose(reflon, mylon, atol=1e-4) assert np.allclose(reflat, mylat, atol=1e-4) return grid def _lonlat_grid_from_dataset(ds): """Seek for longitude and latitude coordinates.""" # Do we have some standard names as variable? vns = ds.variables.keys() xc = utils.str_in_list(vns, utils.valid_names['x_dim']) yc = utils.str_in_list(vns, utils.valid_names['y_dim']) # Sometimes there are more than one coordinates, one of which might have # more dims (e.g. lons in WRF files): take the first one with ndim = 1: x = None for xp in xc: if len(ds.variables[xp].shape) == 1: x = xp y = None for yp in yc: if len(ds.variables[yp].shape) == 1: y = yp if (x is None) or (y is None): return None # OK, get it lon = ds.variables[x][:] lat = ds.variables[y][:] # double check for dubious variables if not utils.str_in_list([x], utils.valid_names['lon_var']) or \ not utils.str_in_list([y], utils.valid_names['lat_var']): # name not usual. see if at least the range follows some conv if (np.max(np.abs(lon)) > 360.1) or (np.max(np.abs(lat)) > 90.1): return None # Make the grid dx = lon[1]-lon[0] dy = lat[1]-lat[0] args = dict(nxny=(lon.shape[0], lat.shape[0]), proj=wgs84, dxdy=(dx, dy)) args['corner'] = (lon[0], lat[0]) return gis.Grid(**args) def _salem_grid_from_dataset(ds): """Seek for coordinates that Salem might have created. Current convention: x_coord, y_coord, pyproj_srs as attribute """ # Projection try: proj = ds.pyproj_srs except AttributeError: proj = None proj = gis.check_crs(proj) if proj is None: return None # Do we have some standard names as variable? vns = ds.variables.keys() xc = utils.str_in_list(vns, utils.valid_names['x_dim']) yc = utils.str_in_list(vns, utils.valid_names['y_dim']) # Sometimes there are more than one coordinates, one of which might have # more dims (e.g. lons in WRF files): take the first one with ndim = 1: x = None for xp in xc: if len(ds.variables[xp].shape) == 1: x = xp y = None for yp in yc: if len(ds.variables[yp].shape) == 1: y = yp if (x is None) or (y is None): return None # OK, get it x = ds.variables[x][:] y = ds.variables[y][:] # Make the grid dx = x[1]-x[0] dy = y[1]-y[0] args = dict(nxny=(x.shape[0], y.shape[0]), proj=proj, dxdy=(dx, dy)) args['corner'] = (x[0], y[0]) return gis.Grid(**args) def grid_from_dataset(ds): """Find out if the dataset contains enough info for Salem to understand. ds can be an xarray dataset or a NetCDF dataset, or anything that resembles it. Returns a :py:class:`~salem.Grid` if successful, None otherwise """ # try if it is a salem file out = _salem_grid_from_dataset(ds) if out is not None: return out # maybe it's a WRF file? if hasattr(ds, 'MOAD_CEN_LAT') or hasattr(ds, 'PROJ_ENVI_STRING'): # WRF and HAR have some special attributes return _wrf_grid_from_dataset(ds) # Try out platte carree return _lonlat_grid_from_dataset(ds) def netcdf_time(ncobj, monthbegin=False): """Check if the netcdf file contains a time that Salem understands.""" import pandas as pd time = None try: vt = utils.str_in_list(ncobj.variables.keys(), utils.valid_names['time_var'])[0] except IndexError: # no time variable return None if hasattr(ncobj, 'TITLE') and 'GEOGRID' in ncobj.TITLE: # geogrid file pass elif ncobj[vt].dtype in ['|S1', '|S19']: # WRF file time = [] try: stimes = ncobj.variables['Times'][:].values except AttributeError: stimes = ncobj.variables['Times'][:] for t in stimes: time.append(pd.to_datetime(t.tostring().decode(), errors='raise', format='%Y-%m-%d_%H:%M:%S')) elif vt is not None: # CF time var = ncobj.variables[vt] time = netCDF4.num2date(var[:], var.units) if monthbegin: # sometimes monthly data is centered in the month (stupid) time = [datetime(t.year, t.month, 1) for t in time] return time class _XarrayAccessorBase(object): """Common logic for for both data structures (DataArray and Dataset). http://xarray.pydata.org/en/stable/internals.html#extending-xarray """ def __init__(self, xarray_obj): self._obj = xarray_obj if isinstance(xarray_obj, xr.DataArray): xarray_obj = xarray_obj.to_dataset(name='var') try: # maybe there was already some georef xarray_obj.attrs['pyproj_srs'] = xarray_obj['var'].pyproj_srs except: pass self.grid = grid_from_dataset(xarray_obj) if self.grid is None: raise RuntimeError('dataset Grid not understood.') dn = xarray_obj.dims.keys() self.x_dim = utils.str_in_list(dn, utils.valid_names['x_dim'])[0] self.y_dim = utils.str_in_list(dn, utils.valid_names['y_dim'])[0] dim = utils.str_in_list(dn, utils.valid_names['t_dim']) self.t_dim = dim[0] if dim else None dim = utils.str_in_list(dn, utils.valid_names['z_dim']) self.z_dim = dim[0] if dim else None def subset(self, margin=0, ds=None, **kwargs): """subset(self, margin=0, shape=None, geometry=None, grid=None, corners=None, crs=wgs84, roi=None) Get a subset of the dataset. Accepts all keywords of :py:func:`~Grid.roi` Parameters ---------- ds : Dataset or DataArray form the ROI from the extent of the Dataset or DataArray shape : str path to a shapefile geometry : geometry a shapely geometry (don't forget the crs keyword) grid : Grid a Grid object which extent will form the ROI corners : tuple a ((x0, y0), (x1, y1)) tuple of the corners of the square to subset the dataset to (don't forget the crs keyword) crs : crs, default wgs84 coordinate reference system of the geometry and corners roi : ndarray a mask for the region of interest to subset the dataset onto margin : int add a margin to the region to subset (can be negative!). Can be used a single keyword, too: set_subset(margin=-5) will remove five pixels from each boundary of the dataset. """ if ds is not None: grid = ds.salem.grid kwargs.setdefault('grid', grid) mask = self.grid.region_of_interest(**kwargs) if not kwargs: # user just wants a margin mask[:] = 1 ids = np.nonzero(mask) sub_x = [np.min(ids[1]) - margin, np.max(ids[1]) + margin] sub_y = [np.min(ids[0]) - margin, np.max(ids[0]) + margin] out_ds = self._obj[{self.x_dim : slice(sub_x[0], sub_x[1]+1), self.y_dim : slice(sub_y[0], sub_y[1]+1)} ] return out_ds def roi(self, ds=None, **kwargs): """roi(self, shape=None, geometry=None, grid=None, corners=None, crs=wgs84, roi=None) Make a region of interest (ROI) for the dataset. All grid points outside the ROI will be masked out. Parameters ---------- ds : Dataset or DataArray form the ROI from the extent of the Dataset or DataArray shape : str path to a shapefile geometry : geometry a shapely geometry (don't forget the crs keyword) grid : Grid a Grid object which extent will form the ROI corners : tuple a ((x0, y0), (x1, y1)) tuple of the corners of the square to subset the dataset to (don't forget the crs keyword) crs : crs, default wgs84 coordinate reference system of the geometry and corners roi : ndarray if you have a mask ready, you can give it here """ if ds is not None: grid = ds.salem.grid kwargs.setdefault('grid', grid) mask = self.grid.region_of_interest(**kwargs) coords = {self.y_dim: self._obj[self.y_dim].values, self.x_dim: self._obj[self.x_dim].values} mask = xr.DataArray(mask, coords=coords, dims=(self.y_dim, self.x_dim)) out = self._obj.where(mask) # keep attrs out.attrs = self._obj.attrs if isinstance(out, xr.DataArray): out.name = self._obj.name # add pyproj string everywhere out.attrs['pyproj_srs'] = self.grid.proj.srs if isinstance(out, xr.Dataset): for v in out.variables: out.variables[v].attrs = self._obj.variables[v].attrs out.variables[v].attrs['pyproj_srs'] = self.grid.proj.srs return out def get_map(self, **kwargs): """Make a salem.Map out of the dataset. All keywords are passed to :py:class:salem.Map """ from salem.graphics import Map return Map(self.grid, **kwargs) def _quick_map(self, obj, ax=None, interp='nearest', **kwargs): """Make a plot of a data array.""" # some metadata? title = obj.name or '' if obj._title_for_slice(): title += ' (' + obj._title_for_slice() + ')' cb = obj.attrs['units'] if 'units' in obj.attrs else '' smap = self.get_map(**kwargs) smap.set_data(obj.values, interp=interp) smap.visualize(ax=ax, title=title, cbar_title=cb) return smap def cartopy(self): """Get a cartopy.crs.Projection for this dataset.""" return proj_to_cartopy(self.grid.proj) def transform(self, other, grid=None, interp='nearest', ks=3): """Reprojects an other Dataset or DataArray onto this grid. Parameters ---------- other: Dataset, DataArray or ndarray the data to project onto self grid: salem.Grid in case the input dataset does not carry georef info interp : str 'nearest' (default), 'linear', or 'spline' ks : int Degree of the bivariate spline. Default is 3. Returns ------- a dataset or a dataarray """ was_dataarray = False if not isinstance(other, xr.Dataset): try: other = other.to_dataset(name=other.name) was_dataarray = True except AttributeError: # must be a ndarray rdata = self.grid.map_gridded_data(other, grid=grid, interp=interp, ks=ks) # let's guess sh = rdata.shape nd = len(sh) if nd == 2: dims = (self.y_dim, self.x_dim) elif nd == 3: newdim = 'new_dim' if self.t_dim and sh[0] == self._obj.dims[self.t_dim]: newdim = self.t_dim if self.z_dim and sh[0] == self._obj.dims[self.z_dim]: newdim = self.z_dim dims = (newdim, self.y_dim, self.x_dim) else: raise NotImplementedError('more than 3 dims not ok yet.') coords = {} for d in dims: if d in self._obj: coords[d] = self._obj[d] out = xr.DataArray(rdata, coords=coords, dims=dims) out.attrs['pyproj_srs'] = self.grid.proj.srs return out # go out = xr.Dataset() for v in other.data_vars: var = other[v] rdata = self.grid.map_gridded_data(var, interp=interp, ks=ks) # remove old coords dims = [d for d in var.dims] coords = {} for c in var.coords: n = utils.str_in_list([c], utils.valid_names['x_dim']) if n: dims = [self.x_dim if x in n else x for x in dims] continue n = utils.str_in_list([c], utils.valid_names['y_dim']) if n: dims = [self.y_dim if x in n else x for x in dims] continue coords[c] = var.coords[c] # add new ones coords[self.x_dim] = self._obj[self.x_dim] coords[self.y_dim] = self._obj[self.y_dim] rdata = xr.DataArray(rdata, coords=coords, attrs=var.attrs, dims=dims) rdata.attrs['pyproj_srs'] = self.grid.proj.srs out[v] = rdata if was_dataarray: out = out[v] else: out.attrs['pyproj_srs'] = self.grid.proj.srs return out @xr.register_dataarray_accessor('salem') class DataArrayAccessor(_XarrayAccessorBase):
[docs] def quick_map(self, ax=None, interp='nearest', **kwargs): """Make a plot of the DataArray.""" return self._quick_map(self._obj, ax=ax, interp=interp, **kwargs)
[docs] def interpz(self, zcoord, levels, dim_name='', use_multiprocessing=True): """Interpolates the array along the vertical dimension Parameters ---------- zcoord: DataArray the z coordinates of the variable. Must be of same dimensions levels: 1dArray the levels at which to interpolate dim_name: str the name of the new dimension use_multiprocessing: bool set to false if, for some reason, you don't want to use mp Returns ------- a new DataArray with the interpolated data """ if self.z_dim is None: raise RuntimeError('zdimension not recognized') data = wrftools.interp3d(self._obj.values, zcoord.values, np.atleast_1d(levels), use_multiprocessing= use_multiprocessing) dims = list(self._obj.dims) zd = np.nonzero([self.z_dim == d for d in dims])[0][0] dims[zd] = dim_name coords = dict(self._obj.coords) coords.pop(self.z_dim, None) coords[dim_name] = np.atleast_1d(levels) out = xr.DataArray(data, name=self._obj.name, dims=dims, coords=coords) out.attrs['pyproj_srs'] = self.grid.proj.srs if not np.asarray(levels).shape: out = out.isel(**{dim_name:0}) return out
@xr.register_dataset_accessor('salem') class DatasetAccessor(_XarrayAccessorBase):
[docs] def quick_map(self, varname, ax=None, interp='nearest', **kwargs): """Make a plot of a variable of the DataSet.""" return self._quick_map(self._obj[varname], ax=ax, interp=interp, **kwargs)
[docs] def transform_and_add(self, other, grid=None, interp='nearest', ks=3, name=None): """Reprojects an other Dataset and adds it's content to the current one. Parameters ---------- other: Dataset, DataArray or ndarray the data to project onto self grid: salem.Grid in case the input dataset does not carry georef info interp : str 'nearest' (default), 'linear', or 'spline' ks : int Degree of the bivariate spline. Default is 3. name: str or dict-like how to name to new variables in self. Per default the new variables are going to keep their name (it will raise an error in case of conflict). Set to a str to to rename the variable (if unique) or set to a dict for mapping the old names to the new names for datasets. """ out = self.transform(other, grid=grid, interp=interp, ks=ks) if isinstance(out, xr.DataArray): new_name = name or out.name if new_name is None: raise ValueError('You need to set name') self._obj[new_name] = out else: for v in out.data_vars: try: new_name = name[v] except (KeyError, TypeError): new_name = v self._obj[new_name] = out[v]
[docs] def wrf_zlevel(self, varname, levels=None, use_multiprocessing=True): """Interpolates to a specified height above sea level. Parameters ---------- varname: str the name of the variable to interpolate levels: 1d array levels at which to interpolate (default: some levels I thought of) use_multiprocessing: bool set to false if, for some reason, you don't want to use mp Returns ------- an interpolated DataArray """ if levels is None: levels = np.array([10, 20, 30, 50, 75, 100, 200, 300, 500, 750, 1000, 2000, 3000, 5000, 7500, 10000]) zcoord = self._obj['Z'] out = self._obj[varname].salem.interpz(zcoord, levels, dim_name='z', use_multiprocessing= use_multiprocessing) out['z'].attrs['description'] = 'height above sea level' out['z'].attrs['units'] = 'm' return out
[docs] def wrf_plevel(self, varname, levels=None, use_multiprocessing=True): """Interpolates to a specified pressure level (hPa). Parameters ---------- varname: str the name of the variable to interpolate levels: 1d array levels at which to interpolate (default: some levels I thought of) use_multiprocessing: bool set to false if, for some reason, you don't want to use mp Returns ------- an interpolated DataArray """ if levels is None: levels = np.array([1000, 975, 950, 925, 900, 850, 800, 750, 700, 650, 600, 550, 500, 450, 400, 300, 200, 100]) zcoord = self._obj['PRESSURE'] out = self._obj[varname].salem.interpz(zcoord, levels, dim_name='p', use_multiprocessing= use_multiprocessing) out['p'].attrs['description'] = 'pressure' out['p'].attrs['units'] = 'hPa' return out
class _NetCDF4DataStore(NetCDF4DataStore): """Just another way to init xarray's datastore """ def __init__(self, filename, mode='r', format='NETCDF4', group=None, writer=None, clobber=True, diskless=False, persist=False, ds=None): import netCDF4 as nc4 if format is None: format = 'NETCDF4' if ds is None: ds = nc4.Dataset(filename, mode=mode, clobber=clobber, diskless=diskless, persist=persist, format=format) with close_on_error(ds): self.ds = _nc4_group(ds, group, mode) self.format = format self.is_remote = False self._filename = filename super(NetCDF4DataStore, self).__init__(writer)
[docs]def open_xr_dataset(file): """Thin wrapper around xarray's open_dataset. This is needed because variables often have not enough georef attrs to be understood alone, and datasets tend to loose their attrs with operations... Returns: -------- xr.Dataset """ # if geotiff, use Salem p, ext = os.path.splitext(file) if (ext.lower() == '.tif') or (ext.lower() == '.tiff'): from salem import GeoTiff geo = GeoTiff(file) # TODO: currently everything is loaded in memory (baaad) da = xr.DataArray(geo.get_vardata(), coords={'x': geo.grid.x_coord, 'y': geo.grid.y_coord}, dims=['y', 'x']) ds = xr.Dataset() ds.attrs['pyproj_srs'] = geo.grid.proj.srs ds['data'] = da ds['data'].attrs['pyproj_srs'] = geo.grid.proj.srs return ds # otherwise rely on xarray ds = xr.open_dataset(file) # did we get it? If not no need to go further try: grid = ds.salem.grid except: warnings.warn('File not recognised as Salem grid. Fall back to xarray', RuntimeWarning) return ds # add cartesian coords for WRF if 'west_east' in ds.variables: ds['west_east'] = ds.salem.grid.x_coord ds['south_north'] = ds.salem.grid.y_coord # add pyproj string everywhere ds.attrs['pyproj_srs'] = grid.proj.srs for v in ds.data_vars: ds[v].attrs['pyproj_srs'] = grid.proj.srs return ds
[docs]def open_wrf_dataset(file): """Wrapper around xarray's open_dataset to make WRF files a bit better. This is needed because variables often have not enough georef attrs to be understood alone, and datasets tend to loose their attrs with operations... Returns: -------- xr.Dataset """ nc = netCDF4.Dataset(file) # Change staggered variables to unstaggered ones for vn, v in nc.variables.items(): if wrftools.Unstaggerer.can_do(v): nc.variables[vn] = wrftools.Unstaggerer(v) # Check if we can add diagnostic variables to the pot for vn in wrftools.var_classes: cl = getattr(wrftools, vn) if cl.can_do(nc): nc.variables[vn] = cl(nc) # trick xarray with our custom netcdf ds = xr.open_dataset(_NetCDF4DataStore(file, ds=nc)) # remove time dimension to lon lat for vn in ['XLONG', 'XLAT']: try: v = ds[vn].isel(Time=0) ds[vn] = xr.DataArray(v.values, dims=['south_north', 'west_east']) except ValueError: pass # Convert time ds['Time'] = netcdf_time(ds) ds.rename({'Time': 'time', 'XLAT': 'lat', 'XLONG': 'lon'}, inplace=True) if 'XTIME' in ds.variables: ds.rename({'XTIME': 'xtime'}, inplace=True) # drop ugly vars vns = ['Times', 'XLAT_V', 'XLAT_U', 'XLONG_U', 'XLONG_V'] vns = [vn for vn in vns if vn in ds.variables] ds = ds.drop(vns) # add cartesian coords ds['west_east'] = ds.salem.grid.x_coord ds['south_north'] = ds.salem.grid.y_coord # add pyproj string everywhere ds.attrs['pyproj_srs'] = ds.salem.grid.proj.srs for v in ds.data_vars: ds[v].attrs['pyproj_srs'] = ds.salem.grid.proj.srs return ds