diff --git a/lgt_convert-runner.py b/lgt_convert-runner.py index d2cd602a7f2d691e5247caf5a041c9acd9bfd0f8..5277fa2ad30d7944506f84818d4e3360e7b282c9 100755 --- a/lgt_convert-runner.py +++ b/lgt_convert-runner.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """Convenience wrapper for running lgt_convert directly from source tree.""" -from lpjguesstools.lgt_convert import main +from lpjguesstools.lgt_convert.lgt_convert import main if __name__ == '__main__': main() diff --git a/lgt_createinput-runner.py b/lgt_createinput-runner.py index 2f74ac22f4feeb7fc71376ffdc6f45f3455f8db6..ce1e96c113d1eb90a2ff34f3d9767f9fac63e884 100755 --- a/lgt_createinput-runner.py +++ b/lgt_createinput-runner.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """Convenience wrapper for running lgt_createinput directly from source tree.""" -from lpjguesstools.lgt_createinput import cli +from lpjguesstools.lgt_createinput.cli import cli if __name__ == '__main__': cli() diff --git a/lpjguesstools/__init__.py b/lpjguesstools/__init__.py index 5890623e8a6f89bc79652af944a899740ea9ff2c..55bc03f7a7a347f77f4cf0826a29add005b006d6 100644 --- a/lpjguesstools/__init__.py +++ b/lpjguesstools/__init__.py @@ -63,10 +63,6 @@ hFile.setFormatter(lfFile) hFile.setLevel(logging.DEBUG) rootLogger.addHandler(hFile) -# constants -NODATA = -9999 -ENCODING = {'_FillValue': NODATA, 'zlib': True} - EPILOG = """Christian Werner, SENCKENBERG Biodiversity and Climate Research Centre (BiK-F) email: christian.werner@senkenberg.de 2017/09/26""" diff --git a/lpjguesstools/__main__.py b/lpjguesstools/__main__.py deleted file mode 100644 index f3e025acb72a20784831f29ef0eb8b2adb1aa2f9..0000000000000000000000000000000000000000 --- a/lpjguesstools/__main__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -*- coding: utf-8 -*- -"""lpjguess2nc.__main__: executed when lpjguess2nc directory is called as script.""" - -from .lpjguess2nc import main -main() diff --git a/lpjguesstools/lgt_convert/__init__.py b/lpjguesstools/lgt_convert/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/lpjguesstools/cli.py b/lpjguesstools/lgt_convert/cli.py similarity index 100% rename from lpjguesstools/cli.py rename to lpjguesstools/lgt_convert/cli.py diff --git a/lpjguesstools/extra.py b/lpjguesstools/lgt_convert/extra.py similarity index 100% rename from lpjguesstools/extra.py rename to lpjguesstools/lgt_convert/extra.py diff --git a/lpjguesstools/lgt_convert.py b/lpjguesstools/lgt_convert/lgt_convert.py similarity index 100% rename from lpjguesstools/lgt_convert.py rename to lpjguesstools/lgt_convert/lgt_convert.py diff --git a/lpjguesstools/lgt_createinput/__init__.py b/lpjguesstools/lgt_createinput/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8ae153539db7a86384603334de5db5ab0cd43d0b --- /dev/null +++ b/lpjguesstools/lgt_createinput/__init__.py @@ -0,0 +1,3 @@ +# constants +NODATA = -9999 +ENCODING = {'_FillValue': NODATA, 'zlib': True} diff --git a/lpjguesstools/_geoprocessing.py b/lpjguesstools/lgt_createinput/_geoprocessing.py similarity index 74% rename from lpjguesstools/_geoprocessing.py rename to lpjguesstools/lgt_createinput/_geoprocessing.py index be37dd39a9f6c68c8a6d13578c94f3126dca71c4..0db1eabba1d07c2695442de95d862f43f6ad99be 100644 --- a/lpjguesstools/_geoprocessing.py +++ b/lpjguesstools/lgt_createinput/_geoprocessing.py @@ -13,6 +13,7 @@ import scipy import xarray as xr from ._tpi import calculate_tpi +import _xr_tile log = logging.getLogger(__name__) @@ -20,26 +21,25 @@ log = logging.getLogger(__name__) from . import NODATA from . import ENCODING -def assign_boundary_cond(dem): +def enlarge_array(a): """Pad grid boundaries for proper slope calc at adges.""" - # This creates a grid 2 rows and 2 columns larger than the input - ny, nx = dem.shape - dem_padded = np.zeros((ny + 2, nx + 2)) - dem_padded[1:-1,1:-1] = dem # Insert old grid in center + ny, nx = a.shape + b = np.zeros((ny + 2, nx + 2)) + b[1:-1,1:-1] = a # Insert old grid in center # Assign boundary conditions - sides - dem_padded[0, 1:-1] = dem[0, :] - dem_padded[-1, 1:-1] = dem[-1, :] - dem_padded[1:-1, 0] = dem[:, 0] - dem_padded[1:-1, -1] = dem[:,-1] + b[0, 1:-1] = a[0, :] + b[-1, 1:-1] = a[-1, :] + b[1:-1, 0] = a[:, 0] + b[1:-1, -1] = a[:,-1] # Assign boundary conditions - corners - dem_padded[0, 0] = dem[0, 0] - dem_padded[0, -1] = dem[0, -1] - dem_padded[-1, 0] = dem[-1, 0] - dem_padded[-1, -1] = dem[-1, 0] - return dem_padded + b[0, 0] = a[0, 0] + b[0, -1] = a[0, -1] + b[-1, 0] = a[-1, 0] + b[-1, -1] = a[-1, 0] + return b def calc_slope_components(dem, dx): @@ -51,7 +51,7 @@ def calc_slope_components(dem, dx): # of the grids in is the same as that out. # Assign boundary conditions - dem_padded = assign_boundary_cond(dem) + dem_padded = enlarge_array(dem) #Compute finite differences Sx = (dem_padded[1:-1, 2:] - dem_padded[1:-1, :-2])/(2*dx) @@ -65,21 +65,6 @@ def calculate_utm_crs(lon, lat): return 'EPSG:%d' % code -def update_attrs(obj, *args, **kwargs): - """Update the attributes in a xarray Dataset or DataArray""" - # use: ds.pipe(update_attrs, foo='bar') - - obj.attrs.update(*args, **kwargs) - return obj - - -def update_encoding(obj, *args, **kwargs): - """Update the encoding in a xarray Dataset or DataArray""" - # use: ds.pipe(update_encoding, ENCODING) - obj.encoding.update(*args, **kwargs) - return obj - - def apply_mask(a, m): """Apply a mask from another masked_array.""" return np.ma.masked_where(np.ma.getmask(m), a) @@ -98,31 +83,32 @@ def calc_slope(Sx, Sy): return np.rad2deg(np.sqrt(Sx**2 + Sy**2)) -def create_dem_dataset(dem, dem_mask, slope, aspect, landform, info=None, source=None): - """Create a datasets from dem, dem_mask, slope and aspect.""" +def derive_coordinates(info): + """Calculate tile lat lon information from GTiff info.""" + dx, _, leftc, _, dy, upperc, _, _, _ = info['transform'] + cellsx = info['width'] + cellsy = info['height'] + lowerc = upperc - (cellsy*abs(dy)) + lons = np.linspace(leftc, leftc+((cellsx+1)*dx), cellsx) + lats = np.linspace(lowerc, lowerc+((cellsy+1)*abs(dy)), cellsy) + # flipped lats + return dict(lon=lons, lat=lats[::-1]) + + +def create_tile(dem, dem_mask, slope, aspect, landform, info=None, source=None): + """Create a tile dataset from dem, dem_mask, slope and aspect.""" # if a rasterio transfrom info is passed if info != None: - dx, _, leftc, _, dy, upperc, _, _, _ = info['transform'] - cellsx = info['width'] - cellsy = info['height'] - lowerc = upperc - (cellsy*abs(dy)) - lons = np.linspace(leftc, leftc+((cellsx+1)*dx), cellsx) - lats = np.linspace(lowerc, lowerc+((cellsy+1)*abs(dy)), cellsy) - - COORDS = dict(lat=lats[::-1], lon=lons) + COORDS = derive_coordinates(info) DIMS = ['lat', 'lon'] else: - log.warn('No spatial information provided. y-axis likely flipped.') + log.warn('No spatial information provided. Y-axis likely flipped.') COORDS={} DIMS=['dim_0', 'dim_1'] # default mask m = np.ma.masked_where(dem_mask == 0, dem_mask) - - def apply_mask(a, m): - """Apply a mask from another masked_array.""" - return np.ma.masked_where(np.ma.getmask(m), a) # special encoding (force output as Int16) ENCODING_INT = dict(ENCODING) @@ -141,10 +127,10 @@ def create_dem_dataset(dem, dem_mask, slope, aspect, landform, info=None, source encoding=ENCODING_INT) # add scale_factor to slope encoding - ds['slope'].pipe(update_encoding, {'scale_factor': 0.1}) + ds['slope'].tile.update_encoding(dict(scale_factor=0.1)) if source != None: - set_global_attr(ds, 'source', source) + ds.tile.set('source', source) return ds @@ -325,27 +311,15 @@ def compute_spatial_dataset(fname_dem, fname_shp=None): aspect = np.ma.masked_array(ds_geo2.read(4), mask=~dem_mask) landform = np.ma.masked_array(ds_geo2.read(5), mask=~dem_mask) - #log.info("Dumping GTiff 1arc file for debugging.") - #print msrc_kwargs - #with rasterio.open('test.tiff', 'w', **msrc_kwargs) as dst: - # for i in range(1,6): - # dst.write(ds_geo2.read(i), i) - - # create dataset - ds = create_dem_dataset(dem, dem_mask, slope, aspect, landform, - info=msrc_kwargs, source=source_name_dem) + + # create tile dataset + ds = create_tile(dem, dem_mask, slope, aspect, landform, + info=msrc_kwargs, source=source_name_dem) return ds # xarray-based methods -def get_center_coord(ds): - """Return the (lon, lat) of dataset (center)""" - lat_c = min(ds.lat.values) + (max(ds.lat.values) - min(ds.lat.values)) * 0.5 - lon_c = min(ds.lon.values) + (max(ds.lon.values) - min(ds.lon.values)) * 0.5 - return (lon_c, lat_c) - - def classify_aspect(ds, TYPE='SIMPLE'): """Classify dataarray from continuous aspect to 1,2,3,4. or 1, 2""" @@ -374,7 +348,7 @@ def classify_aspect(ds, TYPE='SIMPLE'): da_asp_cl = xr.full_like(ds['aspect'], np.nan) ds['aspect_class'] = da_asp_cl ds['aspect_class'][:] = asp_cl - ds['aspect_class'].pipe(update_encoding, ENCODING_INT) + ds['aspect_class'].tile.update_encoding(ENCODING_INT) return ds @@ -391,7 +365,7 @@ def classify_landform(ds, elevation_levels=[], TYPE='SIMPLE'): aspect_lf = [2,3,5] else: log.error('Currently only classifiation schemes WEISS, SIMPLE supported.') - set_global_attr(ds, 'lgt.classification', TYPE.lower()) + ds.tile.set('classification', TYPE.lower()) aspect_lfs = (ds['aspect_class'].to_masked_array() > 0) & \ (np.in1d(ds['landform'].to_masked_array(), aspect_lf).reshape(SHAPE)) @@ -404,7 +378,7 @@ def classify_landform(ds, elevation_levels=[], TYPE='SIMPLE'): ele = ds['elevation'].to_masked_array() if len(elevation_levels) > 0: # add global elevation step attribute (second element, first is lower boundary) - set_global_attr(ds, 'lgt.elevation_step', "%s" % elevation_levels[1]) + ds.tile.set('elevation_step', elevation_levels[1]) for i, (lb, ub) in enumerate(zip(elevation_levels[:-1], elevation_levels[1:])): lf_cl = np.ma.where(((ele >= lb) & (ele < ub)), lf_cl + (i+1) * 100, lf_cl) @@ -417,74 +391,5 @@ def classify_landform(ds, elevation_levels=[], TYPE='SIMPLE'): da_lf_cl = xr.full_like(ds['landform'], np.nan) ds['landform_class'] = da_lf_cl ds['landform_class'][:] = lf_cl - ds['landform_class'].pipe(update_encoding, ENCODING_INT) + ds['landform_class'].tile.update_encoding(ENCODING_INT) return ds - - -def is_empty_tile(ds): - """Check if this tile has no data (sum(mask)==0).""" - if ds['mask'].sum() == 0: - return True - return False - - -def split_srtm1_dataset(ds): - """Split a 1arc SRTM1 dataset into 4 0.5x0.5 tiles.""" - lats_ix = np.arange(len(ds['lat'].values)) - lons_ix = np.arange(len(ds['lon'].values)) - lats = [x.tolist() for x in np.array_split(lats_ix, 2)] - lons = [x.tolist() for x in np.array_split(lons_ix, 2)] - - # if we have an uneven length of split arrays (srtm1 data with 3601 px) - if len(lats[0]) != len(lats[1]): - lats[1] = [lats[0][-1]] + lats[1] - - if len(lons[0]) != len(lons[1]): - lons[1] = [lons[0][-1]] + lons[1] - - # split into 4 tiles [0.5x0.5 deg] - ds1 = ds[dict(lat=lats[0], lon=lons[0])] - ds2 = ds[dict(lat=lats[0], lon=lons[1])] - ds3 = ds[dict(lat=lats[1], lon=lons[1])] - ds4 = ds[dict(lat=lats[1], lon=lons[0])] - - return_tiles = [] - for i, ds_ in enumerate([ds1, ds2, ds3, ds4]): - if is_empty_tile(ds_): - return_tiles.append(None) - else: - return_tiles.append(ds_) - - return return_tiles - - -def get_global_attr(ds, attr_name): - """Get the global dataset attribute.""" - if type(ds) == str: - attr = None - with xr.open_dataset(ds) as ds: - if ds.attrs.has_key(attr_name): - attr = ds.attrs[attr_name] - return attr - else: - if ds.attrs.has_key(attr_name): - return ds.attrs[attr_name] - else: - return None - - -def set_global_attr(ds, attr_name, value, overwrite=False): - """Set the global dataset attribute.""" - if type(ds) == str: - with xr.open_dataset(ds) as ds: - if ds.attrs.has_key(attr_name) and overwrite==False: - log.error("Trying to set attr %s to %s (it already exists)." % ( - attr_name, str(value))) - else: - ds.attrs[attr_name] = value - else: - if ds.attrs.has_key(attr_name) and overwrite==False: - log.error("Trying to set attr %s to %s (it already exists)." % ( - attr_name, str(value))) - else: - ds.attrs[attr_name] = value diff --git a/lpjguesstools/lgt_createinput/_srtm1.py b/lpjguesstools/lgt_createinput/_srtm1.py new file mode 100644 index 0000000000000000000000000000000000000000..2905758db00c1be094d190effde80c7eb59af96b --- /dev/null +++ b/lpjguesstools/lgt_createinput/_srtm1.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +"""lpjguesstools._srtm1: SRTM1 DEM specific routines.""" + + +import numpy as np + + +def is_empty_tile(ds): + """Check if this tile has no data (sum(mask)==0).""" + if ds['mask'].sum() == 0: + return True + return False + + +def split_srtm1_dataset(ds): + """Split a 1arc SRTM1 dataset into 4 0.5x0.5 tiles.""" + lats_ix = np.arange(len(ds['lat'].values)) + lons_ix = np.arange(len(ds['lon'].values)) + lats = [x.tolist() for x in np.array_split(lats_ix, 2)] + lons = [x.tolist() for x in np.array_split(lons_ix, 2)] + + # if we have an uneven length of split arrays (srtm1 data with 3601 px) + if len(lats[0]) != len(lats[1]): + lats[1] = [lats[0][-1]] + lats[1] + + if len(lons[0]) != len(lons[1]): + lons[1] = [lons[0][-1]] + lons[1] + + # split into 4 tiles [0.5x0.5 deg] + ds1 = ds[dict(lat=lats[0], lon=lons[0])] + ds2 = ds[dict(lat=lats[0], lon=lons[1])] + ds3 = ds[dict(lat=lats[1], lon=lons[1])] + ds4 = ds[dict(lat=lats[1], lon=lons[0])] + + return_tiles = [] + for i, ds_ in enumerate([ds1, ds2, ds3, ds4]): + if is_empty_tile(ds_): + return_tiles.append(None) + else: + return_tiles.append(ds_) + + return return_tiles diff --git a/lpjguesstools/_tpi.py b/lpjguesstools/lgt_createinput/_tpi.py similarity index 100% rename from lpjguesstools/_tpi.py rename to lpjguesstools/lgt_createinput/_tpi.py diff --git a/lpjguesstools/lgt_createinput/_xr_geo.py b/lpjguesstools/lgt_createinput/_xr_geo.py new file mode 100644 index 0000000000000000000000000000000000000000..787154967b880f3d74abd89068a890c642b9ccc8 --- /dev/null +++ b/lpjguesstools/lgt_createinput/_xr_geo.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +"""lpjguesstools._xr_geo: Geo extensions of xarray objects.""" + +# extensions to xarray dataset and dataarray types +# functions and properties are accessed via the geo namespace: + +import logging +import xarray as xr + +log = logging.getLogger(__name__) + + +# geo xarray extensions + +def center(self, decimals=2): + """Return the geographic center point of this dataset/ dataarray.""" + if self._center is None: + # we can use a cache on our accessor objects, because accessors + # themselves are cached on instances that access them. + lon = self._obj.lon + lat = self._obj.lat + self._center = (round(float(lon.mean()), decimals), \ + round(float(lat.mean()), decimals)) + return self._center + +def contains(self, coord): + """Check if coordinate is within extent of dataset/ dataarray.""" + + # allow coord tuple and 2-coord extent + if (type(coord) in [list, tuple]) and (len(coord) == 4): + coords = [(coord[0], coord[1]), (coord[2], coord[3])] + elif (type(coord) in [list, tuple]) and (len(coord) == 2): + coords = [coord] + else: + log.warn('Invalid extent: %s' % str(coords)) + return False + + lon, lat = self._obj.lon, self._obj.lat + checks = [] + for c in coords: + inside = False + if (c[0] >= lon.min() and c[0] <= lon.max()): + if (c[1] >= lat.min() and c[1] <= lat.max()): + inside = True + checks.append(inside) + if all(checks): + return True + return False + +def clip(self, extent): + """Return clipped copy of dataset/ dataarray.""" + if type(extent) == type(self._obj): + lon1, lat1, lon2, lat2 = x.extent + elif self.contains(x): + lon1, lat1, lon2, lat2 = x + else: + log.warn("Clip failed.") + return None + return self._obj.sel(lon=(self._obj.lon >= lon1 | self._obj.lon <= lon2), + lat=(self._obj.lat >= lat2 | self._obj.lat <= lat2)) + +# properties +@property +def extent(self): + lon, lat = self._obj.lon, self._obj.lat + extent = [min(lon.values), min(lat.values), max(lon.values), max(lat.values)] + if self._extent is None: + self._extent = extent + return self._extent + +@property +def is_3d(self): + if len(self._obj.dims) == 3: + return True + return False + + +@xr.register_dataset_accessor('geo') +class GeoAccessor(object): + def __init__(self, xarray_obj): + self._obj = xarray_obj + self._center = None + self._extent = None + + center = center + clip = clip + contains = contains + extent = extent + is_3d = is_3d + + +@xr.register_dataarray_accessor('geo') +class GeoAccessor(object): + def __init__(self, xarray_obj): + self._obj = xarray_obj + self._center = None + self._extent = None + + center = center + clip = clip + contains = contains + extent = extent + is_3d = is_3d diff --git a/lpjguesstools/lgt_createinput/_xr_tile.py b/lpjguesstools/lgt_createinput/_xr_tile.py new file mode 100644 index 0000000000000000000000000000000000000000..ce6eafe6e614e508d86642f47889b810242a5ae4 --- /dev/null +++ b/lpjguesstools/lgt_createinput/_xr_tile.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +"""lpjguesstools._xr_tile: Tile extensions of xarray objects.""" + +# extensions to xarray dataset and dataarray types +# functions and properties are accessed via the tile namespace: + +import logging +import xarray as xr + +log = logging.getLogger(__name__) + + +# lgt xarray extensions + +@xr.register_dataset_accessor('tile') +class TileAccessor(object): + def __init__(self, xarray_obj): + self._obj = xarray_obj + self.lgtattrs = set() + + def _get_lgt_attrs(self): + # parse any existing lgt attributes + lgtattrs = [x.replace('lgt.','') for x in self._obj.attrs.keys() if 'lgt.' in x] + self.lgtattrs = set(lgtattrs) + + def set(self, attr_name, attr_value, overwrite=False): + """Set a lgt attribute""" + aname = 'lgt.' + attr_name + if attr_name in self.lgtattrs: + if not overwrite: + log.warn('LGT attribute "%s" exists but overwrite is False.' % aname) + return + self.lgtattrs.union(set(attr_name)) + self._obj.attrs[aname] = attr_value + + def get(self, attr_name): + """Get a lgt attribute""" + if len(self.lgtattrs) == 0: + self._get_lgt_attrs() + if attr_name in self.lgtattrs: + return self._obj.attrs['lgt.' + attr_name] + else: + log.warn('LGT attribute "%s" does not exist in dataset.' % attr_name) + return None + + def copy_attrs(self, src, overwrite=False): + """Copy lgt attributes from src to dst dataset.""" + # check src.tile.lgtattrs + if len(src.tile.lgtattrs) == 0: + src.tile._get_lgt_attrs() + for attr_name in src.tile.lgtattrs: + self.set(attr_name, src.tile.get(attr_name), overwrite=overwrite) + + +@xr.register_dataarray_accessor('tile') +class TileAccessor(object): + def __init__(self, xarray_obj): + self._obj = xarray_obj + + def update_attrs(self, *args, **kwargs): + """Update the attributes in a xarray DataArray""" + def _update_attrs(obj, *args, **kwargs): + obj.attrs.update(*args, **kwargs) + return obj + self._obj.pipe(_update_attrs, *args, **kwargs) + + def update_encoding(self, *args, **kwargs): + """Update the encoding in a xarray DataArray""" + def _update_encoding(obj, *args, **kwargs): + obj.encoding.update(*args, **kwargs) + return obj + self._obj.pipe(_update_encoding, *args, **kwargs) diff --git a/lpjguesstools/lgt_createinput/cli.py b/lpjguesstools/lgt_createinput/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..c5d00316d0727f32c092781747e7ac21d39aa0e5 --- /dev/null +++ b/lpjguesstools/lgt_createinput/cli.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +"""lgt_createinput.cli: Commandline interpreter for lgt_createinput.py.""" + +import click +import logging + +from main import main + +# import constants +from .. import EPILOG + + +log = logging.getLogger(__name__) + +class Bunch(object): + """Simple data storage class.""" + def __init__(self, adict): + self.__dict__.update(adict) + + +# command line arguments +CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) +click.Context.get_usage = click.Context.get_help + +@click.command(context_settings=CONTEXT_SETTINGS, epilog=EPILOG) + +@click.option('--classfication', type=click.Choice(['SIMPLE', 'WEISS']), + default='SIMPLE', show_default=True, + help='classification scheme') + +@click.option('--cutoff', default=1.0, show_default=True, + help='required area fraction [%]') + +@click.option('--dems', metavar='PATH', + help='source for DEM files') + +@click.option('--extent', nargs=4, type=click.FLOAT, metavar='LON1 LAT1 LON2 LAT2', + help='extent of output netcdf files') + +@click.option('--force-overwrite', is_flag=True, default=False, + help='overwrite tiles even if they already exists') + +@click.option('--gridlist', default='gridlist.txt', + help='name of created gridlist file') + +@click.option('--masks', metavar='PATH', + help='source for water masks (shp)') + +@click.option('--verbose', is_flag=True, + help='increase logging info') + +@click.version_option() + +@click.argument('storage', type=click.Path(exists=True)) +@click.argument('outdir', type=click.Path(exists=True)) + +def cli(cutoff, dems, masks, gridlist, extent, classfication, storage, outdir, verbose, force_overwrite): + """LPJ-GUESS 4.0 subpixel mode input creation tool + + This tools creates site and landform netCDF files and a gridlist file + from SRTM1 (or potentially other) elevation data. + + """ + + # example: + #./lgt_createinput.py processed output --dems=srtm1/*.zip --masks=srtm1_shp_mask --extent -76 -56 -66 -16 + + if verbose: + logging.getLogger(__name__).setLevel(logging.DEBUG) + else: + logging.getLogger(__name__).setLevel(logging.INFO) + + if dems is not None: + SRTMSTORE_PATH = dems + + if masks is not None: + WATERMASKSTORE_PATH = masks + + REGION = None + if len(extent) == 4: + REGION = list(extent) + + + # the setup dictionary to convert into a bunch obj + config_data=dict(SRTMSTORE_PATH=dems, + WATERMASKSTORE_PATH=masks, + TILESTORE_PATH=storage, + REGION=REGION, + CLASSIFICATION=classfication, + CUTOFF=cutoff, + OUTPUT_PATH=outdir, + GRIDLIST_TXT=gridlist, + OUTDIR=outdir, + OVERWRITE=force_overwrite) + + # TODO: change logging level based on verbose flag + cfg = Bunch(config_data) + + main(cfg) + \ No newline at end of file diff --git a/lpjguesstools/lgt_createinput.py b/lpjguesstools/lgt_createinput/main.py similarity index 73% rename from lpjguesstools/lgt_createinput.py rename to lpjguesstools/lgt_createinput/main.py index 50c28c5c916125694797bc844ac58698688ddce2..2902fd06ef0018bf78a7b3cdaf87674f56f6f15e 100644 --- a/lpjguesstools/lgt_createinput.py +++ b/lpjguesstools/lgt_createinput/main.py @@ -1,4 +1,4 @@ -"""FILE lgt_createinput.py. +"""FILE lgt_createinput.main.py This script creates condensed LPJ netcdf files for landforms and soil properties @@ -23,9 +23,7 @@ email: christian.werner@senkenberg.de 2017/02/07 """ -import click -from collections import Counter, OrderedDict -import copy +from collections import OrderedDict import datetime import glob import logging @@ -36,18 +34,20 @@ import string import time import xarray as xr -from _geoprocessing import compute_spatial_dataset, classify_aspect, \ - classify_landform, get_center_coord, \ - split_srtm1_dataset, get_global_attr, \ - analyze_filename_dem, set_global_attr, \ - update_attrs, update_encoding +from ._geoprocessing import analyze_filename_dem, \ + classify_aspect, \ + classify_landform, \ + compute_spatial_dataset + +from ._srtm1 import split_srtm1_dataset +import _xr_geo +import _xr_tile log = logging.getLogger(__name__) # import constants from . import NODATA from . import ENCODING -from . import EPILOG # quick helpers # TODO: move to a dedicated file later @@ -62,18 +62,18 @@ def time_dec(func): return wrapper -class Bunch(object): - """Simple data storage class.""" - def __init__(self, adict): - self.__dict__.update(adict) - +varSoil = {'TOTC': ('soc', 'Soil Organic Carbon', 'soc', 'percent', 0.1), + 'SDTO': ('sand', 'Sand', 'sand', 'percent', 1.0), + 'STPC': ('silt', 'Silt', 'silt', 'percent', 1.0), + 'CLPC': ('clay', 'Clay', 'clay', 'percent', 1.0)} -varD = {'TOTC': ('SOC', 'Soil Organic Carbon', 'percent', 0.1), - 'SDTO': ('SAND', 'Sand', 'percent', 1.0), - 'STPC': ('SILT', 'Silt', 'percent', 1.0), - 'CLPC': ('CLAY', 'Clay', 'percent', 1.0)} +varLF = {'lfcnt': ('lfcnt', 'Number of landforms', 'lfcnt', '-', 1.0), + 'slope': ('slope', 'Slope', 'slope', 'deg', 1.0), + 'fraction': ('fraction', 'Landform Fraction', 'fraction', '1/1', 1.0), + 'elevation': ('elevation', 'Elevation', 'elevation', 'm', 1.0)} -soil_vars = sorted(varD.keys()) +soil_vars = sorted(varSoil.keys()) +lf_vars = sorted(varLF.keys()) def convert_float_coord_to_string(coord, p=2): @@ -149,21 +149,6 @@ def define_landform_classes(step, limit, TYPE='SIMPLE'): return (lf_full_set, ele_breaks) -def tile_already_processed(fname, TILESTORE_PATH): - """Check if the tile exists.""" - existing_tiles = glob.glob(os.path.join(TILESTORE_PATH, '*.nc')) - #existing_tiles = [os.path.basename(x) for x in glob.glob(glob_string)] - - for existing_tile in existing_tiles: - source_attr = get_global_attr(existing_tile, 'source') - if source_attr != None: - # TODO: add second check (version?) - _, source_name = analyze_filename_dem(fname) - if source_name == source_attr: - return True - return False - - def tiles_already_processed(TILESTORE_PATH): """Check if the tile exists.""" existing_tiles = glob.glob(os.path.join(TILESTORE_PATH, '*.nc')) @@ -171,10 +156,12 @@ def tiles_already_processed(TILESTORE_PATH): processed_tiles = [] for existing_tile in existing_tiles: - source_attr = get_global_attr(existing_tile, 'source') - if source_attr != None: - _, source_name = analyze_filename_dem(source_attr) - processed_tiles.append(source_name) + with xr.open_dataset(existing_tile) as ds: + source = ds.tile.get('source') + if source is not None: + processed_tiles.append(source) + else: + log.warn('Source attr not set in file %s.' % existing_tile) return processed_tiles @@ -231,27 +218,33 @@ def get_tile_summary(ds, cutoff=0): lf_elevation = ds['elevation'].values[ix].mean() df.loc[i, 'slope'] = lf_slope df.loc[i, 'elevation'] = lf_elevation - - #df = df.sort_values(by='cells', ascending=False) - df.reset_index(inplace=True) + return df def tile_files_compatible(files): """Get global attribute from all tile netcdf files and check - they are the same. + they were created with an identical elevation step. """ - x = [get_global_attr(x, 'lgt.elevation_step') for x in files] - if all(x): - if x[0] != None: - return True - else: - return False + + fingerprints = [] + for file in files: + with xr.open_dataset(file) as ds: + fingerprint = (ds.tile.get('elevation_step'), ds.tile.get('classification')) + fingerprints.append(fingerprint) + + # check if elements are equal + if all(x==fingerprints[0] for x in fingerprints): + # check if there are Nones' in any fingerprint + if not all(fingerprints): + return False + return True + return False def create_stats_table(df, var): """Create a landform info table for all coords and given var.""" - + df_ = df[var].unstack(level=-1, fill_value=NODATA) # rename columns and split coord tuple col to lon and lat col df_.columns = ['lf' + str(col) for col in df_.columns] @@ -273,9 +266,10 @@ def convert_dem_files(cfg, lf_ele_levels): if cfg.SRTMSTORE_PATH is not None: # if glob_string is a directory, add wildcard for globbing + glob_string = cfg.SRTMSTORE_PATH if os.path.isdir(cfg.SRTMSTORE_PATH): glob_string = os.path.join(cfg.SRTMSTORE_PATH, '*') - dem_files = sorted(glob.glob(cfg.SRTMSTORE_PATH)) + dem_files = sorted(glob.glob(glob_string)) existing_tiles = tiles_already_processed(cfg.TILESTORE_PATH) @@ -316,7 +310,7 @@ def convert_dem_files(cfg, lf_ele_levels): # store file in tilestore # get tile center coordinate and name - lon, lat = get_center_coord(tile) + lon, lat = tile.geo.center() lonlat_string = convert_float_coord_to_string((lon,lat)) tile_name = "srtm1_processed_%s.nc" % lonlat_string tile.to_netcdf(os.path.join(cfg.TILESTORE_PATH, tile_name), \ @@ -344,11 +338,11 @@ def compute_statistics(cfg): log.debug('Computing statistics for tile %s' % tile) with xr.open_dataset(tile) as ds: lf_stats = get_tile_summary(ds, cutoff=cfg.CUTOFF) + lf_stats.reset_index(inplace=True) number_of_ids = len(lf_stats) - lon, lat = get_center_coord(ds) - - coords = pd.Series([(round(lon,2),round(lat,2), int(number_of_ids)) for x in range(len(lf_stats))]) - lf_stats['coord'] = coords + lon, lat = ds.geo.center() + coord_tuple = (round(lon,2),round(lat,2), int(number_of_ids)) + lf_stats['coord'] = pd.Series([coord_tuple for _ in range(len(lf_stats))]) lf_stats.set_index(['coord', 'lf_id'], inplace=True) tiles_stats.append( lf_stats ) @@ -387,9 +381,14 @@ def assign_to_dataarray(data, df, lf_full_set, refdata=False): return data -def spatialclip_dataframe(df, extent): +def spatialclip_df(df, extent): """Clip dataframe wit lat lon columns by extent.""" + if any(e is None for e in extent): + log.warn("SpatialClip: extent passed is None.") lon1, lat1, lon2, lat2 = extent + + if ('lon' not in df.columns) or ('lat' not in df.columns): + log.warn("SpatialClip: lat/ lon cloumn missing in df.") return df[((df.lon >= lon1) & (df.lon <= lon2)) & ((df.lat >= lat1) & (df.lat <= lat2))] @@ -419,35 +418,55 @@ def build_site_netcdf(soilref, elevref, extent=None): # identify locations that need filling and use left neighbor smask = np.where(ds_soil['TOTC'].to_masked_array().mask, 1, 0) emask = np.where(ds_ele['data'].to_masked_array().mask, 1, 0) - missing = np.where((smask == 1) & (emask == 0), 1, 0) + # no soil data but elevation: gap-fill wioth neighbors + missing = np.where((smask == 1) & (emask == 0), 1, 0) ix, jx = np.where(missing == 1) - for i, j in zip(ix, jx): - for v in soil_vars: - ds_soil[v][i, j] = ds_soil[v][i, j-1] + + if len(ix) > 0: + log.debug('Cells with elevation but no soil data [BEFORE GF: %d].' % len(ix)) + + for i, j in zip(ix, jx): + for v in soil_vars: + if np.isfinite(ds_soil[v][i, j-1]): + ds_soil[v][i, j] = ds_soil[v][i, j-1].copy(deep=True) + elif np.isfinite(ds_soil[v][i, j+1]): + ds_soil[v][i, j] = ds_soil[v][i, j+1].copy(deep=True) + else: + print 'neighbours have nodata !!!' + x = ds_soil[v][i, j].to_masked_array() + + smask2 = np.where(ds_soil['TOTC'].to_masked_array().mask, 1, 0) + missing = np.where((smask2 == 1) & (emask == 0), 1, 0) + ix, jx = np.where(missing == 1) + log.debug('Cells with elevation but no soil data [AFTER GF: %d].' % len(ix)) dsout = xr.Dataset() # soil vars for v in soil_vars: - conv = varD[v][-1] + conv = varSoil[v][-1] da = ds_soil[v].copy(deep=True) * conv - da.name = varD[v][0] + da.name = varSoil[v][0] - vattr = {'name': varD[v][0], - 'long_name': varD[v][1], - 'units': varD[v][2]} + vattr = {'name': varSoil[v][0], + 'long_name': varSoil[v][1], + 'standard_name': varSoil[v][2], + 'units': varSoil[v][3], + 'coordinates': "lat lon"} - da.pipe(update_attrs, vattr) - da.pipe(update_encoding, ENCODING) + da.tile.update_attrs(vattr) + da.tile.update_encoding(ENCODING) + da[:] = np.ma.masked_where(emask, da.to_masked_array()) dsout[da.name] = da # ele var da = xr.full_like(da.copy(deep=True), np.nan) - da.name = 'ELEVATION' - vattr = {'name': 'elevation', 'long_name': 'Elevation', 'units': 'meters'} - da.pipe(update_attrs, vattr) - da.pipe(update_encoding, ENCODING) + da.name = 'elevation' + vattr = {'name': 'elevation', 'long_name': 'Elevation', + 'units': 'meters', 'standard_name': 'elevation'} + da.tile.update_attrs(vattr) + da.tile.update_encoding(ENCODING) da[:] = ds_ele['data'].to_masked_array() dsout[da.name] = da @@ -467,25 +486,20 @@ def build_landform_netcdf(lf_full_set, frac_lf, elev_lf, slope_lf, cfg, elevatio _blank = np.empty(SHAPE) da_lfcnt = xr.DataArray(_blank.copy()[0,:,:].astype(int), name='lfcnt', coords=COORDS[1:]) - da_frac = xr.DataArray(_blank.copy(), name='frac', coords=COORDS) + da_frac = xr.DataArray(_blank.copy(), name='fraction', coords=COORDS) da_slope = xr.DataArray(_blank.copy(), name='slope', coords=COORDS) da_elev = xr.DataArray(_blank.copy(), name='elevation', coords=COORDS) # check that landform coordinates are in refnc - lat_min, lat_max = frac_lf.lat.min(), frac_lf.lat.max() - lon_min, lon_max = frac_lf.lon.min(), frac_lf.lon.max() + df_extent = [frac_lf.lon.min(), frac_lf.lat.min(), frac_lf.lon.max(), frac_lf.lat.max()] + log.debug('df_extent: %s' % str(df_extent)) + log.debug('contains: %s' % str(refnc.geo.contains(df_extent))) - lats = refnc['lat'].values.tolist() - lons = refnc['lon'].values.tolist() - - if ((lat_min < min(lats)) | (lat_max > max(lats)) | - (lon_min < min(lons)) | (lon_max > max(lons))): - log.warn('DEM tiles not within specified extent. Clipping.') - - # potentially clip dataframes - frac_lf = spatialclip_dataframe(frac_lf, [min(lons), min(lats), max(lons), max(lats)]) - slope_lf = spatialclip_dataframe(slope_lf, [min(lons), min(lats), max(lons), max(lats)]) - elev_lf = spatialclip_dataframe(elev_lf, [min(lons), min(lats), max(lons), max(lats)]) + if refnc.geo.contains(df_extent) == False: + + frac_lf = spatialclip_df(frac_lf, refnc.geo.extent) + slope_lf = spatialclip_df(slope_lf, refnc.geo.extent) + elev_lf = spatialclip_df(elev_lf, refnc.geo.extent) # dump files frac_lf.to_csv(os.path.join(cfg.OUTDIR, 'df_frac.csv'), index=False) @@ -502,28 +516,46 @@ def build_landform_netcdf(lf_full_set, frac_lf, elev_lf, slope_lf, cfg, elevatio dsout[da_frac.name] = da_frac dsout[da_slope.name] = da_slope dsout[da_elev.name] = da_elev + + for v in dsout.data_vars: + vattr = {} + if v in lf_vars: + vattr = {'name': varLF[v][0], + 'long_name': varLF[v][1], + 'standard_name': varLF[v][2], + 'units': varLF[v][3], + 'coordinates': "lat lon"} + dsout[v].tile.update_attrs(vattr) + dsout[v].tile.update_encoding(ENCODING) + + dsout['lat'].tile.update_attrs(dict(standard_name='latitude', + long_name='latitude', + units='degrees_north')) + + dsout['lon'].tile.update_attrs(dict(standard_name='longitude', + long_name='longitude', + units='degrees_east')) + + dsout['lf_id'].tile.update_attrs(dict(standard_name='lf_id', + long_name='lf_id', + units='-')) for dv in dsout.data_vars: - dsout[dv].pipe(update_encoding, ENCODING) + dsout[dv].tile.update_encoding(ENCODING) # register the specific landform properties (elevation steps, classfication) - set_global_attr(dsout, 'lgt.elevation_step', elevation_levels[1]) - set_global_attr(dsout, 'lgt.classification', cfg.CLASSIFICATION.lower()) + dsout.tile.set('elevation_step', elevation_levels[1]) + dsout.tile.set('classification', cfg.CLASSIFICATION.lower()) return dsout -def copy_global_lgt_attrs(ds, dsout): - """Copy global lgt attributes from source to target dataset.""" - source_attrs = dict([(k, v) for k, v in ds.attrs.items() if 'lgt.' in k]) - dsout.attrs.update(source_attrs) - def build_compressed(ds): """Build LPJ-Guess 4.0 compatible compressed netcdf file.""" # identify landforms netcdf if 'lfcnt' in ds.data_vars: v = 'lfcnt' - elif 'ELEVATION' in ds.data_vars: - v = 'ELEVATION' + elif 'elevation' in ds.data_vars: + v = 'elevation' else: log.error("Not a valid xr.Dataset (landforms or site only).") @@ -556,9 +588,17 @@ def build_compressed(ds): lats = xr.DataArray(latL, name='lat', coords=[('land_id', LFIDS)]) lons = xr.DataArray(lonL, name='lon', coords=[('land_id', LFIDS)]) + lats.tile.update_attrs(dict(standard_name='latitude', + long_name='latitude', + units='degrees_north')) + + lons.tile.update_attrs(dict(standard_name='longitude', + long_name='longitude', + units='degrees_east')) + # create land_id reference array # TODO: clip land_id array to Chile country extent? - da_ids.pipe(update_encoding, ENCODING) + da_ids.tile.update_encoding(ENCODING) ds_ids = da_ids.to_dataset(name='land_id') # create xr.Dataset @@ -570,11 +610,10 @@ def build_compressed(ds): for v in ds.data_vars: if is_3d(ds, v): _shape = (len(LFIDS), len(ds[ds[v].dims[0]])) - COORDS = [('land_id', LFIDS), ('lf_id', ds.coords['lf_id'])] + COORDS = [('land_id', LFIDS), ('lf_id', ds['lf_id'])] else: _shape = (len(LFIDS),) COORDS = [('land_id', LFIDS)] - _blank = np.ones( _shape ) _da = xr.DataArray(_blank[:], name=v, coords=COORDS) @@ -583,12 +622,18 @@ def build_compressed(ds): vals = ds[v].sel(lat=lat, lon=lon).to_masked_array() _da.loc[land_id] = vals - _da.pipe(update_attrs, ds[v].attrs) - _da.pipe(update_encoding, ENCODING) + _da.tile.update_attrs(ds[v].attrs) + _da.tile.update_encoding(ENCODING) dsout[_da.name] = _da - copy_global_lgt_attrs(ds, dsout) + if is_3d(ds, v): + dsout['lf_id'].tile.update_attrs(dict(standard_name='lf_id', + long_name='lf_id', + units='-')) + + # copy lgt attributes from ssrc to dst + dsout.tile.copy_attrs(ds) return (ds_ids, dsout) @@ -625,8 +670,8 @@ def main(cfg): # default soil and elevation data (contained in package) import pkg_resources - SOIL_NC = pkg_resources.resource_filename(__name__, 'data/GLOBAL_WISESOIL_DOM_05deg.nc') - ELEVATION_NC = pkg_resources.resource_filename(__name__, 'data/GLOBAL_ELEVATION_05deg.nc') + SOIL_NC = pkg_resources.resource_filename(__name__, '../data/GLOBAL_WISESOIL_DOM_05deg.nc') + ELEVATION_NC = pkg_resources.resource_filename(__name__, '../data/GLOBAL_ELEVATION_05deg.nc') log.info("Converting DEM files and computing landform stats") @@ -648,7 +693,7 @@ def main(cfg): lf_ele_levels, refnc=sitenc) # clip to joined mask - elev_mask = np.where(sitenc['ELEVATION'].values == NODATA, 0, 1) + elev_mask = np.where(sitenc['elevation'].values == NODATA, 0, 1) landform_mask = np.where(landformnc['lfcnt'].values == NODATA, 0, 1) valid_mask = elev_mask * landform_mask @@ -682,83 +727,3 @@ def main(cfg): log.info("Done") -# command line arguments -CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) -click.Context.get_usage = click.Context.get_help - -@click.command(context_settings=CONTEXT_SETTINGS, epilog=EPILOG) - -@click.option('--classfication', type=click.Choice(['SIMPLE', 'WEISS']), - default='SIMPLE', show_default=True, - help='classification scheme') - -@click.option('--cutoff', default=1.0, show_default=True, - help='required area fraction [%]') - -@click.option('--dems', metavar='PATH', - help='source for DEM files') - -@click.option('--extent', nargs=4, type=click.FLOAT, metavar='LON1 LAT1 LON2 LAT2', - help='extent of output netcdf files') - -@click.option('--force-overwrite', is_flag=True, default=False, - help='overwrite tiles even if they already exists') - -@click.option('--gridlist', default='gridlist.txt', - help='name of created gridlist file') - -@click.option('--masks', metavar='PATH', - help='source for water masks (shp)') - -@click.option('--verbose', is_flag=True, - help='increase logging info') - -@click.version_option() - -@click.argument('storage', type=click.Path(exists=True)) -@click.argument('outdir', type=click.Path(exists=True)) - -def cli(cutoff, dems, masks, gridlist, extent, classfication, storage, outdir, verbose, force_overwrite): - """LPJ-GUESS 4.0 subpixel mode input creation tool - - This tools creates site and landform netCDF files and a gridlist file - from SRTM1 (or potentially other) elevation data. - - """ - - # example: - #./lgt_createinput.py processed output --dems=srtm1/*.zip --masks=srtm1_shp_mask --extent -76 -56 -66 -16 - - if verbose: - logging.getLogger(__name__).setLevel(logging.DEBUG) - else: - logging.getLogger(__name__).setLevel(logging.INFO) - - if dems is not None: - SRTMSTORE_PATH = dems - - if masks is not None: - WATERMASKSTORE_PATH = masks - - REGION = None - if len(extent) == 4: - REGION = list(extent) - - - # the setup dictionary to convert into a bunch obj - config_data=dict(SRTMSTORE_PATH=dems, - WATERMASKSTORE_PATH=masks, - TILESTORE_PATH=storage, - REGION=REGION, - CLASSIFICATION=classfication, - CUTOFF=cutoff, - OUTPUT_PATH=outdir, - GRIDLIST_TXT=gridlist, - OUTDIR=outdir, - OVERWRITE=force_overwrite) - - # TODO: change logging level based on verbose flag - cfg = Bunch(config_data) - - main(cfg) - \ No newline at end of file diff --git a/setup.py b/setup.py index 201844eeb7e0b6647c1ce29d215ed8da198ef83c..1488445ef6e1b7b1f5623985ec83f2f1bb8332c5 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ from os import path import re version = re.search('^__version__\s*=\s*"(.*)"', - open('lpjguesstools/lgt_convert.py').read(), re.M).group(1) + open('lpjguesstools/lgt_convert/lgt_convert.py').read(), re.M).group(1) here = path.abspath(path.dirname(__file__)) @@ -87,8 +87,8 @@ setup(name='lpjguesstools', 'data/GLOBAL_ELEVATION_05deg.nc']}, include_package_data=True, entry_points={'console_scripts': [ - 'lgt_convert=lpjguesstools.lgt_convert:main', - 'lgt_createinput=lpjguesstools.lgt_createinput:cli']}, + 'lgt_convert=lpjguesstools.lgt_convert.lgt_convert:main', + 'lgt_createinput=lpjguesstools.lgt_createinput.cli:cli']}, test_suite='lpjguess2nc.test.test_lpjguess2nc', cmdclass={'test': PyTest, 'sdist': PyPack}, dependency_links=dependency_links)