Source code for solaris.tile.raster_tile

import os
import rasterio
from rasterio.warp import Resampling, calculate_default_transform
from rasterio.vrt import WarpedVRT
from rasterio.mask import mask as rasterio_mask
# from rio_cogeo.cogeo import cog_validate, cog_translate
from ..utils.core import _check_crs, _check_rasterio_im_load
# removing the following until COG functionality is implemented
# from ..utils.tile import read_cog_tile
from ..utils.geo import reproject, split_geom, raster_get_projection_unit
import numpy as np
from shapely.geometry import box
from tqdm.auto import tqdm

[docs]class RasterTiler(object): """An object to tile geospatial image strips into smaller pieces. Arguments --------- dest_dir : str, optional Path to save output files to. If not specified here, this must be provided when ``Tiler.tile_generator()`` is called. src_tile_size : `tuple` of `int`s, optional The size of the input tiles in ``(y, x)`` coordinates. By default, this is in pixel units; this can be changed to metric units using the `use_src_metric_size` argument. use_src_metric_size : bool, optional Is `src_tile_size` in pixel units (default) or metric? To set to metric use ``use_src_metric_size=True``. dest_tile_size : `tuple` of `int`s, optional The size of the output tiles in ``(y, x)`` coordinates in pixel units. dest_crs : int, optional The EPSG code or rasterio.crs.CRS object for the CRS that output tiles are in. If not provided, tiles use the crs of `src` by default. Cannot be specified along with project_to_meters. project_to_meters : bool, optional Specifies whether to project to the correct utm zone for the location. Cannot be specified along with `dest_crs`. nodata : int, optional The value in `src` that specifies nodata. If this value is not provided, solaris will attempt to infer the nodata value from the `src` metadata. alpha : int, optional The band to specify as alpha. If not provided, solaris will attempt to infer if an alpha band is present from the `src` metadata. force_load_cog : bool, optional If `src` is a cloud-optimized geotiff, use this argument to force loading in the entire image at once. aoi_boundary : :class:`shapely.geometry.Polygon` or `list`-like [left, bottom, right, top] Defines the bounds of the AOI in which tiles will be created. If a tile will extend beyond the boundary, the "extra" pixels will have the value `nodata`. Can be provided at initialization of the :class:`Tiler` instance or when the input is loaded. If not provided either upon initialization or when an image is loaded, the image bounds will be used; if provided, this value will override image metadata. tile_bounds : `list`-like A `list`-like of ``[left, bottom, right, top]`` lists of coordinates defining the boundaries of the tiles to create. If not provided, they will be generated from the `aoi_boundary` based on `src_tile_size`. verbose : bool, optional Verbose text output. By default, verbose text is not printed. Attributes ---------- src : :class:`rasterio.io.DatasetReader` The source dataset to tile. src_path : `str` The path or URL to the source dataset. Used for calling ``rio_cogeo.cogeo.cog_validate()``. dest_dir : `str` The directory to save the output tiles to. If not dest_crs : int The EPSG code for the output images. If not provided, outputs will keep the same CRS as the source image when ``Tiler.make_tile_images()`` is called. tile_size: tuple A ``(y, x)`` :class:`tuple` storing the dimensions of the output. These are in pixel units unless ``size_in_meters=True``. size_in_meters : bool If ``True``, the units of `tile_size` are in meters instead of pixels. is_cog : bool Indicates whether or not the image being tiled is a Cloud-Optimized GeoTIFF (COG). Determined by checking COG validity using `rio-cogeo <https://github.com/cogeotiff/rio-cogeo>`_. nodata : `int` The value for nodata in the outputs. Will be set to zero in outputs if ``None``. alpha : `int` The band index corresponding to an alpha channel (if one exists). ``None`` if there is no alpha channel. tile_bounds : list A :class:`list` containing ``[left, bottom, right, top]`` bounds sublists for each tile created. resampling : str The resampling method for any resizing. Possible values are ``['bilinear', 'cubic', 'nearest', 'lanczos', 'average']`` (or any other option from :class:`rasterio.warp.Resampling`). aoi_boundary : :class:`shapely.geometry.Polygon` A :class:`shapely.geometry.Polygon` defining the bounds of the AOI that tiles will be created for. If a tile will extend beyond the boundary, the "extra" pixels will have the value `nodata`. Can be provided at initialization of the :class:`Tiler` instance or when the input is loaded. """ def __init__(self, dest_dir=None, dest_crs=None, project_to_meters=False, channel_idxs=None, src_tile_size=(900, 900), use_src_metric_size=False, dest_tile_size=None, dest_metric_size=False, aoi_boundary=None, nodata=None, alpha=None, force_load_cog=False, resampling=None, tile_bounds=None, verbose=False): # set up attributes if verbose: print("Initializing Tiler...") self.dest_dir = dest_dir if not os.path.exists(self.dest_dir): os.makedirs(self.dest_dir) if dest_crs is not None: self.dest_crs = _check_crs(dest_crs) else: self.dest_crs = None self.src_tile_size = src_tile_size self.use_src_metric_size = use_src_metric_size if dest_tile_size is None: self.dest_tile_size = src_tile_size else: self.dest_tile_size = dest_tile_size self.resampling = resampling self.force_load_cog = force_load_cog self.nodata = nodata self.alpha = alpha self.aoi_boundary = aoi_boundary self.tile_bounds = tile_bounds self.project_to_meters = project_to_meters self.tile_paths = [] # retains the paths of the last call to .tile() # self.cog_output = cog_output self.verbose = verbose if self.verbose: print('Tiler initialized.') print('dest_dir: {}'.format(self.dest_dir)) if dest_crs is not None: print('dest_crs: {}'.format(self.dest_crs)) else: print('dest_crs will be inferred from source data.') print('src_tile_size: {}'.format(self.src_tile_size)) print('tile size units metric: {}'.format(self.use_src_metric_size)) if self.resampling is not None: print('Resampling is set to {}'.format(self.resampling)) else: print('Resampling is set to None')
[docs] def tile(self, src, dest_dir=None, channel_idxs=None, nodata=None, alpha=None, restrict_to_aoi=False, dest_fname_base=None, nodata_threshold = None): """An object to tile geospatial image strips into smaller pieces. Arguments --------- src : :class:`rasterio.io.DatasetReader` or str The source dataset to tile. nodata_threshold : float, optional Nodata percentages greater than this threshold will not be saved as tiles. restrict_to_aoi : bool, optional Requires aoi_boundary. Sets all pixel values outside the aoi_boundary to the nodata value of the src image. """ src = _check_rasterio_im_load(src) restricted_im_path = os.path.join(self.dest_dir, "aoi_restricted_"+ os.path.basename(src.name)) self.src_name = src.name # preserves original src name in case restrict is used if restrict_to_aoi is True: if self.aoi_boundary is None: raise ValueError("aoi_boundary must be specified when RasterTiler is called.") mask_geometry = self.aoi_boundary.intersection(box(*src.bounds)) # prevents enlarging raster to size of aoi_boundary index_lst = list(np.arange(1,src.meta['count']+1)) # no need to use transform t since we don't crop. cropping messes up transform of tiled outputs arr, t = rasterio_mask(src, [mask_geometry], all_touched=False, invert=False, nodata=src.meta['nodata'], filled=True, crop=False, pad=False, pad_width=0.5, indexes=list(index_lst)) with rasterio.open(restricted_im_path, 'w', **src.profile) as dest: dest.write(arr) dest.close() src.close() src = _check_rasterio_im_load(restricted_im_path) #if restrict_to_aoi, we overwrite the src to be the masked raster tile_gen = self.tile_generator(src, dest_dir, channel_idxs, nodata, alpha, self.aoi_boundary, restrict_to_aoi) if self.verbose: print('Beginning tiling...') self.tile_paths = [] if nodata_threshold is not None: if nodata_threshold > 1: raise ValueError("nodata_threshold should be expressed as a float less than 1.") print("nodata value threshold supplied, filtering based on this percentage.") new_tile_bounds = [] for tile_data, mask, profile, tb in tqdm(tile_gen): nodata_count = np.logical_or.reduce((tile_data == profile['nodata']), axis=0).sum() nodata_perc = nodata_count / (tile_data.shape[1] * tile_data.shape[2]) if nodata_perc < nodata_threshold: dest_path = self.save_tile( tile_data, mask, profile, dest_fname_base) self.tile_paths.append(dest_path) new_tile_bounds.append(tb) else: print("{} of nodata is over the nodata_threshold, tile not saved.".format(nodata_perc)) self.tile_bounds = new_tile_bounds # only keep the tile bounds that make it past the nodata threshold else: for tile_data, mask, profile, tb in tqdm(tile_gen): dest_path = self.save_tile( tile_data, mask, profile, dest_fname_base) self.tile_paths.append(dest_path) if self.verbose: print('Tiling complete. Cleaning up...') self.src.close() if os.path.exists(os.path.join(self.dest_dir, 'tmp.tif')): os.remove(os.path.join(self.dest_dir, 'tmp.tif')) if os.path.exists(restricted_im_path): os.remove(restricted_im_path) if self.verbose: print("Done. CRS returned for vector tiling.") return _check_crs(profile['crs']) # returns the crs to be used for vector tiling
[docs] def tile_generator(self, src, dest_dir=None, channel_idxs=None, nodata=None, alpha=None, aoi_boundary=None, restrict_to_aoi=False): """Create the tiled output imagery from input tiles. Uses the arguments provided at initialization to generate output tiles. First, tile locations are generated based on `Tiler.tile_size` and `Tiler.size_in_meters` given the bounds of the input image. Arguments --------- src : `str` or :class:`Rasterio.DatasetReader` The source data to tile from. If this is a "classic" (non-cloud-optimized) GeoTIFF, the whole image will be loaded in; if it's cloud-optimized, only the required portions will be loaded during tiling unless ``force_load_cog=True`` was specified upon initialization. dest_dir : str, optional The path to the destination directory to output images to. If the path doesn't exist, it will be created. This argument is required if it wasn't provided during initialization. channel_idxs : list, optional The list of channel indices to be included in the output array. If not provided, all channels will be included. *Note:* per ``rasterio`` convention, indexing starts at ``1``, not ``0``. nodata : int, optional The value in `src` that specifies nodata. If this value is not provided, solaris will attempt to infer the nodata value from the `src` metadata. alpha : int, optional The band to specify as alpha. If not provided, solaris will attempt to infer if an alpha band is present from the `src` metadata. aoi_boundary : `list`-like or :class:`shapely.geometry.Polygon`, optional AOI bounds can be provided either as a ``[left, bottom, right, top]`` :class:`list`-like or as a :class:`shapely.geometry.Polygon`. restrict_to_aoi : bool, optional Should output tiles be restricted to the limits of the AOI? If ``True``, any tile that partially extends beyond the limits of the AOI will not be returned. This is the inverse of the ``boundless`` argument for :class:`rasterio.io.DatasetReader` 's ``.read()`` method. Yields ------ tile_data, mask, tile_bounds tile_data : :class:`numpy.ndarray` A list of lists of each tile's bounds in the order they were created, to be used in tiling vector data. These data are also stored as an attribute of the :class:`Tiler` instance named `tile_bounds`. """ # parse arguments if self.verbose: print("Checking input data...") # if isinstance(src, str): # self.is_cog = cog_validate(src) # else: # self.is_cog = cog_validate(src.name) # if self.verbose: # print('COG: {}'.format(self.is_cog)) self.src = _check_rasterio_im_load(src) if channel_idxs is None: # if not provided, include them all channel_idxs = list(range(1, self.src.count + 1)) print(channel_idxs) self.src_crs = _check_crs(self.src.crs, return_rasterio=True) # necessary to use rasterio crs for reproject if self.verbose: print('Source CRS: EPSG:{}'.format(self.src_crs.to_epsg())) if self.dest_crs is None: self.dest_crs = self.src_crs if self.verbose: print('Destination CRS: EPSG:{}'.format(self.dest_crs.to_epsg())) self.src_path = self.src.name self.proj_unit = raster_get_projection_unit(self.src) # for rounding if self.verbose: print("Inputs OK.") if self.use_src_metric_size: if self.verbose: print("Checking if inputs are in metric units...") if self.project_to_meters: if self.verbose: print("Input CRS is not metric. " "Reprojecting the input to UTM.") self.src = reproject(self.src, resampling_method=self.resampling, dest_path=os.path.join(self.dest_dir, 'tmp.tif')) if self.verbose: print('Done reprojecting.') if nodata is None and self.nodata is None: self.nodata = self.src.nodata elif nodata is not None: self.nodata = nodata # get index of alpha channel if alpha is None and self.alpha is None: mf_list = [rasterio.enums.MaskFlags.alpha in i for i in self.src.mask_flag_enums] # list with True at idx of alpha c try: self.alpha = np.where(mf_list)[0] + 1 except IndexError: # if there isn't a True self.alpha = None else: self.alpha = alpha if getattr(self, 'tile_bounds', None) is None: self.get_tile_bounds() for tb in self.tile_bounds: # removing the following line until COG functionality implemented if True: # not self.is_cog or self.force_load_cog: window = rasterio.windows.from_bounds( *tb, transform=self.src.transform, width=self.src_tile_size[1], height=self.src_tile_size[0]) print('reading data from window') print(self.nodata) if self.src.count != 1: src_data = self.src.read( window=window, indexes=channel_idxs, boundless=True, fill_value=self.nodata) else: src_data = self.src.read( window=window, boundless=True, fill_value=self.nodata) dst_transform, width, height = calculate_default_transform( self.src.crs, self.dest_crs, self.src.width, self.src.height, *tb, dst_height=self.dest_tile_size[0], dst_width=self.dest_tile_size[1]) if self.dest_crs != self.src_crs and self.resampling_method is not None: tile_data = np.zeros(shape=(src_data.shape[0], height, width), dtype=src_data.dtype) rasterio.warp.reproject( source=src_data, destination=tile_data, src_transform=self.src.window_transform(window), src_crs=self.src.crs, dst_transform=dst_transform, dst_crs=self.dest_crs, dst_nodata=self.nodata, resampling=getattr(Resampling, self.resampling)) elif self.dest_crs != self.src_crs and self.resampling_method is None: print("Warning: You've set resampling to None but your " "destination projection differs from the source " "projection. Using bilinear resampling by default.") tile_data = np.zeros(shape=(src_data.shape[0], height, width), dtype=src_data.dtype) tile_data = np.zeros(shape=(src_data.shape[0], height, width), dtype=src_data.dtype) rasterio.warp.reproject( source=src_data, destination=tile_data, src_transform=self.src.window_transform(window), src_crs=self.src.crs, dst_transform=dst_transform, dst_crs=self.dest_crs, dst_nodata=self.nodata, resampling=getattr(Resampling, "bilinear")) else: # for the case where there is no resampling and no dest_crs specified, no need to reproject or resample tile_data = src_data if self.nodata: mask = np.all(tile_data != nodata, axis=0).astype(np.uint8) * 255 elif self.alpha: mask = self.src.read(self.alpha, window=window) else: mask = None # placeholder # else: # tile_data, mask, window, aff_xform = read_cog_tile( # src=self.src, # bounds=tb, # tile_size=self.dest_tile_size, # indexes=channel_idxs, # nodata=self.nodata, # resampling_method=self.resampling # ) profile = self.src.profile profile.update(width=self.dest_tile_size[1], height=self.dest_tile_size[0], crs=self.dest_crs, transform=dst_transform) if len(tile_data.shape) == 2: # if there's no channel band profile.update(count=1) else: profile.update(count=tile_data.shape[0]) yield tile_data, mask, profile, tb
[docs] def save_tile(self, tile_data, mask, profile, dest_fname_base=None): """Save a tile created by ``Tiler.tile_generator()``.""" if dest_fname_base is None: dest_fname_root = os.path.splitext( os.path.split(self.src_path)[1])[0] else: dest_fname_root = dest_fname_base if self.proj_unit not in ['meter', 'metre']: dest_fname = '{}_{}_{}.tif'.format( dest_fname_root, np.round(profile['transform'][2], 3), np.round(profile['transform'][5], 3)) else: dest_fname = '{}_{}_{}.tif'.format( dest_fname_root, int(profile['transform'][2]), int(profile['transform'][5])) # if self.cog_output: # dest_path = os.path.join(self.dest_dir, 'tmp.tif') # else: dest_path = os.path.join(self.dest_dir, dest_fname) with rasterio.open(dest_path, 'w', **profile) as dest: if profile['count'] == 1: dest.write(tile_data[0, :, :], 1) else: for band in range(1, profile['count'] + 1): # base-1 vs. base-0 indexing...bleh dest.write(tile_data[band-1, :, :], band) if self.alpha: # write the mask if there's an alpha band dest.write(mask, profile['count'] + 1) dest.close() return dest_path
# if self.cog_output: # self._create_cog(os.path.join(self.dest_dir, 'tmp.tif'), # os.path.join(self.dest_dir, dest_fname)) # os.remove(os.path.join(self.dest_dir, 'tmp.tif'))
[docs] def fill_all_nodata(self, nodata_fill): """ Fills all tile nodata values with a fill value. The standard workflow is to run this function only after generating label masks and using the original output from the raster tiler to filter out label pixels that overlap nodata pixels in a tile. For example, solaris.vector.mask.instance_mask will filter out nodata pixels from a label mask if a reference_im is provided, and after this step nodata pixels may be filled by calling this method. nodata_fill : int, float, or str, optional Default is to not fill any nodata values. Otherwise, pixels outside of the aoi_boundary and pixels inside the aoi_boundary with the nodata value will be filled. "mean" will fill pixels with the channel-wise mean. Providing an int or float will fill pixels in all channels with the provided value. Returns: list The fill values, in case the mean of the src image should be used for normalization later. """ src = _check_rasterio_im_load(self.src_name) if nodata_fill == "mean": arr = src.read() arr_nan = np.where(arr!=src.nodata, arr, np.nan) fill_values = np.nanmean(arr_nan, axis=tuple(range(1, arr_nan.ndim))) print('Fill values set to {}'.format(fill_values)) elif isinstance(nodata_fill, (float, int)): fill_values = src.meta['count'] * [nodata_fill] print('Fill values set to {}'.format(fill_values)) else: raise TypeError('nodata_fill must be "mean", int, or float. {} was supplied.'.format(nodata_fill)) src.close() for tile_path in self.tile_paths: tile_src = rasterio.open(tile_path, "r+") tile_data = tile_src.read() for i in np.arange(tile_data.shape[0]): tile_data[i,...][tile_data[i,...] == tile_src.nodata] = fill_values[i] # set fill value for each band if tile_src.meta['count'] == 1: tile_src.write(tile_data[0, :, :], 1) else: for band in range(1, tile_src.meta['count'] + 1): # base-1 vs. base-0 indexing...bleh tile_src.write(tile_data[band-1, :, :], band) tile_src.close() return fill_values
def _create_cog(self, src_path, dest_path): """Overwrite non-cloud-optimized GeoTIFF with a COG.""" cog_translate(src_path=src_path, dst_path=dest_path, dst_kwargs={'crs': self.dest_crs}, resampling=self.resampling, latitude_adjustment=False)
[docs] def get_tile_bounds(self): """Get tile bounds for each tile to be created in the input CRS.""" if not self.aoi_boundary: if not self.src: raise ValueError('aoi_boundary and/or a source file must be ' 'provided.') else: # set to the bounds of the image # split_geom can take a list self.aoi_boundary = list(self.src.bounds) self.tile_bounds = split_geom(geometry=self.aoi_boundary, tile_size=self.src_tile_size, resolution=( self.src.transform[0], -self.src.transform[4]), use_projection_units=self.use_src_metric_size, src_img=self.src)
[docs] def load_src_vrt(self): """Load a source dataset's VRT into the destination CRS.""" vrt_params = dict(crs=self.dest_crs, resampling=getattr(Resampling, self.resampling), src_nodata=self.nodata, dst_nodata=self.nodata) return WarpedVRT(self.src, **vrt_params)