"""
Some useful functions
"""
from __future__ import division
import io
import json
import os
import shutil
import time
import zipfile
from collections import OrderedDict
import numpy as np
from joblib import Memory
from salem import cache_dir, download_dir, python_version
from six.moves.urllib.error import HTTPError, URLError
from six.moves.urllib.request import urlretrieve, urlopen
# Joblib
def _joblib_cache_dir():
"""Get the path to the right joblib directory.
We need to make sure that cached files correspond to the same
environment. To this end we make a unique directory hash, depending on the
version and location of several packages we thought are important
(because they change often, or because conda versions give different
results than pip versions).
Returns
-------
path to the dir
"""
import hashlib
out = OrderedDict(python_version=python_version)
try:
import shapely
out['shapely_version'] = shapely.__version__
out['shapely_file'] = shapely.__file__
except ImportError:
pass
try:
import fiona
out['fiona_version'] = fiona.__version__
out['fiona_file'] = fiona.__file__
except ImportError:
pass
try:
import geopandas
out['geopandas_version'] = geopandas.__version__
out['geopandas_file'] = geopandas.__file__
except ImportError:
pass
try:
import osgeo
out['osgeo_version'] = osgeo.__version__
out['osgeo_file'] = osgeo.__file__
except ImportError:
pass
try:
import pyproj
out['pyproj_version'] = pyproj.__version__
out['pyproj_file'] = pyproj.__file__
except ImportError:
pass
try:
import salem
out['salem_version'] = salem.__version__
out['salem_file'] = salem.__file__
except ImportError:
pass
# ok, now make a dummy str that we will hash
strout = ''
for k, v in out.items():
strout += k + v
strout = 'salem_hash_' + hashlib.md5(strout.encode()).hexdigest()
dirout = os.path.join(cache_dir, 'joblib', strout)
if not os.path.exists(dirout):
os.makedirs(dirout)
return dirout
memory = Memory(cachedir=_joblib_cache_dir(), verbose=0)
# A series of variables and dimension names that Salem will understand
valid_names = dict()
valid_names['x_dim'] = ['west_east', 'lon', 'longitude', 'longitudes', 'lons',
'xlong', 'xlong_m', 'dimlon', 'x', 'lon_3', 'long',
'phony_dim_0', 'eastings', 'easting']
valid_names['y_dim'] = ['south_north', 'lat', 'latitude', 'latitudes', 'lats',
'xlat', 'xlat_m', 'dimlat', 'y','lat_3', 'phony_dim_1',
'northings', 'northing']
valid_names['z_dim'] = ['levelist','level', 'pressure', 'press', 'zlevel', 'z',
'bottom_top']
valid_names['t_dim'] = ['time', 'times', 'xtime']
valid_names['lon_var'] = ['lon', 'longitude', 'longitudes', 'lons', 'long']
valid_names['lat_var'] = ['lat', 'latitude', 'latitudes', 'lats']
valid_names['time_var'] = ['time', 'times']
sample_data_gh_repo = 'fmaussion/salem-sample-data'
nearth_base = 'http://naturalearth.springercarto.com/ne3_data/'
def str_in_list(l1, l2):
"""Check if one element of l1 is in l2 and if yes, returns the name of
that element in a list (could be more than one.
Examples
--------
>>> print(str_in_list(['time', 'lon'], ['temp','time','prcp']))
['time']
>>> print(str_in_list(['time', 'lon'], ['temp','time','prcp','lon']))
['time', 'lon']
"""
return [i for i in l1 if i.lower() in l2]
def empty_cache():
"""Empty salem's cache directory."""
if os.path.exists(cache_dir):
shutil.rmtree(cache_dir)
os.makedirs(cache_dir)
def cached_shapefile_path(fpath):
"""Checks if a shapefile is cached and returns the corresponding path.
This function checks for the last time the file has changed,
so it should be safe to use.
"""
p, ext = os.path.splitext(fpath)
if ext.lower() == '.p':
# No need to recache pickled files (this is for nested calls)
return fpath
if ext.lower() != '.shp':
raise ValueError('File extension not recognised: {}'.format(ext))
# Cached directory and file
cp = os.path.commonprefix([cache_dir, p])
cp = os.path.join(cache_dir, python_version + '_cache',
os.path.relpath(p, cp))
ct = '{:d}'.format(int(round(os.path.getmtime(fpath)*1000.)))
of = os.path.join(cp, ct + '.p')
if os.path.exists(cp):
# We have to check if the file changed
if os.path.exists(of):
return of
else:
# the file has changed
shutil.rmtree(cp)
os.makedirs(cp)
return of
def _urlretrieve(url, ofile, *args, **kwargs):
"""Wrapper for urlretrieve which overwrites."""
try:
return urlretrieve(url, ofile, *args, **kwargs)
except:
if os.path.exists(ofile):
os.remove(ofile)
# try to make the thing more robust with a second shot
try:
return urlretrieve(url, ofile, *args, **kwargs)
except:
if os.path.exists(ofile):
os.remove(ofile)
raise
def download_demo_files():
"""Checks if the demo data is already on the cache and downloads it.
Borrowed from OGGM.
"""
master_sha_url = 'https://api.github.com/repos/%s/commits/master' % \
sample_data_gh_repo
master_zip_url = 'https://github.com/%s/archive/master.zip' % \
sample_data_gh_repo
ofile = os.path.join(cache_dir, 'salem-sample-data.zip')
shafile = os.path.join(cache_dir, 'salem-sample-data-commit.txt')
odir = os.path.join(cache_dir)
# a file containing the online's file's hash and the time of last check
if os.path.exists(shafile):
with open(shafile, 'r') as sfile:
local_sha = sfile.read().strip()
last_mod = os.path.getmtime(shafile)
else:
# very first download
local_sha = '0000'
last_mod = 0
# test only every hour
if time.time() - last_mod > 3600:
write_sha = True
try:
# this might fail with HTTP 403 when server overload
resp = urlopen(master_sha_url)
# following try/finally is just for py2/3 compatibility
# https://mail.python.org/pipermail/python-list/2016-March/704073.html
try:
json_str = resp.read().decode('utf-8')
finally:
resp.close()
json_obj = json.loads(json_str)
master_sha = json_obj['sha']
# if not same, delete entire dir
if local_sha != master_sha:
empty_cache()
except (HTTPError, URLError):
master_sha = 'error'
else:
write_sha = False
# download only if necessary
if not os.path.exists(ofile):
print('Downloading salem-sample-data...')
_urlretrieve(master_zip_url, ofile)
# Trying to make the download more robust
try:
with zipfile.ZipFile(ofile) as zf:
zf.extractall(odir)
except zipfile.BadZipfile:
# try another time
if os.path.exists(ofile):
os.remove(ofile)
_urlretrieve(master_zip_url, ofile)
with zipfile.ZipFile(ofile) as zf:
zf.extractall(odir)
# sha did change, replace
if write_sha:
with open(shafile, 'w') as sfile:
sfile.write(master_sha)
# list of files for output
out = dict()
sdir = os.path.join(cache_dir, 'salem-sample-data-master')
for root, directories, filenames in os.walk(sdir):
for filename in filenames:
out[filename] = os.path.join(root, filename)
return out
[docs]def get_demo_file(fname):
"""Returns the path to the desired demo file."""
d = download_demo_files()
if fname in d:
return d[fname]
else:
return None
def get_natural_earth_file(res='lr'):
"""Returns the path to the desired natural earth file.
http://www.shadedrelief.com/natural3/pages/textures.html
Parameters
----------
res : str
'lr' or 'hr' (low res or high res)
"""
if not os.path.exists(download_dir):
os.makedirs(download_dir)
if res == 'lr':
return get_demo_file('natural_earth_lr.jpg')
elif res == 'mr':
urlpath = nearth_base + '8192/textures/2_no_clouds_8k.jpg'
elif res == 'hr':
urlpath = nearth_base + '16200/textures/2_no_clouds_16k.jpg'
ofile = os.path.join(download_dir, 'natural_earth_' + res+ '.jpg')
# download only if necessary
if not os.path.exists(ofile):
print('Downloading Natural Earth ' + res + '...')
_urlretrieve(urlpath, ofile)
return ofile
@memory.cache
def read_colormap(name):
"""Reads a colormap from the custom files in Salem."""
path = get_demo_file(name + '.c3g')
out = []
with open(path, 'r') as file:
for line in file:
if 'rgb(' not in line:
continue
line = line.split('(')[-1].split(')')[0]
out.append([float(n) for n in line.split(',')])
return np.asarray(out).astype(np.float) / 256.
@memory.cache
def joblib_read_img_url(url):
"""Prevent to re-download from GoogleStaticMap if it was done before"""
from matplotlib.image import imread
fd = urlopen(url, timeout=10)
return imread(io.BytesIO(fd.read()))