Skip to content
Commits on Source (5)
Changes
=======
1.0.23 (2019-05-15)
-------------------
- Break rio-calc output into chunks of user-specified size to constrain the
amount of memory used (#1668).
- Attempts to set attributes of datasets opened in "r" mode now raise a custom
DatasetAttributeError. This exception derives from both RasterioError and
NotImplementedError, which maintains backwards compatibility (#1676).
- Block sizes are no longer guarded when creating untiled datasets (#1689).
- CRS objects are now hashable and equivalent CRS objects have the same hash
value (#1684).
- Allow AWS regions to be specified no matter the signing of requests (#1670).
- Add links to API documentation from the Python quickstart guide.
- Use "CRS.from_epsg({})" instead of "CRS.from_dict(init='epsg:{}')" as the
representation for CRS objects that are completely described by an EPSG code.
- Use GDAL's string parsing to get metadata item keys and values, which
accommodates uncommon "KEY:VALUE" forms.
1.0.22 (2019-03-20)
-------------------
......@@ -11,7 +29,8 @@ Changes
1.0.21 (2019-02-28)
-------------------
- Fix for bug in implementation of the pickle protocol (#1643).
- Fix for bug in implementation of the pickle protocol and added support for
Python's copy protocol (#1643).
1.0.20 (2019-02-27)
-------------------
......
......@@ -16,7 +16,7 @@ provides a Python API based on N-D arrays.
Rasterio supports Python 2.7 and 3.3-3.6 on Linux and Mac OS X.
Read the documentation for more details: https://rasterio.readthedocs.io/en/latest/.
Read the documentation for more details: https://rasterio.readthedocs.io/.
Example
=======
......
rasterio (1.0.23-1) unstable; urgency=medium
* Team upload.
* New upstream release.
* Add patch to fix clean target.
-- Bas Couwenberg <sebastic@debian.org> Thu, 16 May 2019 06:15:05 +0200
rasterio (1.0.22-1~exp1) unstable; urgency=medium
* Team upload.
......
Description: Fix clean target.
GDAL/gdal-config may not be available outside the build chroot.
Author: Bas Couwenberg <sebastic@debian.org>
Forwarded: https://github.com/mapbox/rasterio/pull/1695
--- a/setup.py
+++ b/setup.py
@@ -81,6 +81,9 @@ extra_link_args = []
gdal2plus = False
gdal_output = [None] * 4
gdalversion = None
+gdal_major_version = 0
+gdal_minor_version = 0
+sdist_fill = []
try:
import numpy as np
@@ -88,63 +91,64 @@ try:
except ImportError:
sys.exit("ERROR: Numpy and its headers are required to run setup().")
-try:
- gdal_config = os.environ.get('GDAL_CONFIG', 'gdal-config')
- for i, flag in enumerate(("--cflags", "--libs", "--datadir", "--version")):
- gdal_output[i] = check_output([gdal_config, flag]).strip()
-
- for item in gdal_output[0].split():
- if item.startswith("-I"):
- include_dirs.extend(item[2:].split(":"))
- for item in gdal_output[1].split():
- if item.startswith("-L"):
- library_dirs.extend(item[2:].split(":"))
- elif item.startswith("-l"):
- libraries.append(item[2:])
+if "clean" not in sys.argv:
+ try:
+ gdal_config = os.environ.get('GDAL_CONFIG', 'gdal-config')
+ for i, flag in enumerate(("--cflags", "--libs", "--datadir", "--version")):
+ gdal_output[i] = check_output([gdal_config, flag]).strip()
+
+ for item in gdal_output[0].split():
+ if item.startswith("-I"):
+ include_dirs.extend(item[2:].split(":"))
+ for item in gdal_output[1].split():
+ if item.startswith("-L"):
+ library_dirs.extend(item[2:].split(":"))
+ elif item.startswith("-l"):
+ libraries.append(item[2:])
+ else:
+ # e.g. -framework GDAL
+ extra_link_args.append(item)
+ # datadir, gdal_output[2] handled below
+
+ gdalversion = gdal_output[3]
+ if gdalversion:
+ log.info("GDAL API version obtained from gdal-config: %s",
+ gdalversion)
+
+ except Exception as e:
+ if os.name == "nt":
+ log.info("Building on Windows requires extra options to setup.py "
+ "to locate needed GDAL files. More information is available "
+ "in the README.")
else:
- # e.g. -framework GDAL
- extra_link_args.append(item)
- # datadir, gdal_output[2] handled below
-
- gdalversion = gdal_output[3]
- if gdalversion:
- log.info("GDAL API version obtained from gdal-config: %s",
- gdalversion)
+ log.warning("Failed to get options via gdal-config: %s", str(e))
-except Exception as e:
- if os.name == "nt":
- log.info("Building on Windows requires extra options to setup.py "
- "to locate needed GDAL files. More information is available "
- "in the README.")
- else:
- log.warning("Failed to get options via gdal-config: %s", str(e))
+ # Get GDAL API version from environment variable.
+ if 'GDAL_VERSION' in os.environ:
+ gdalversion = os.environ['GDAL_VERSION']
+ log.info("GDAL API version obtained from environment: %s", gdalversion)
+
+ # Get GDAL API version from the command line if specified there.
+ if '--gdalversion' in sys.argv:
+ index = sys.argv.index('--gdalversion')
+ sys.argv.pop(index)
+ gdalversion = sys.argv.pop(index)
+ log.info("GDAL API version obtained from command line option: %s",
+ gdalversion)
-# Get GDAL API version from environment variable.
-if 'GDAL_VERSION' in os.environ:
- gdalversion = os.environ['GDAL_VERSION']
- log.info("GDAL API version obtained from environment: %s", gdalversion)
-
-# Get GDAL API version from the command line if specified there.
-if '--gdalversion' in sys.argv:
- index = sys.argv.index('--gdalversion')
- sys.argv.pop(index)
- gdalversion = sys.argv.pop(index)
- log.info("GDAL API version obtained from command line option: %s",
- gdalversion)
-
-if not gdalversion:
- sys.exit("ERROR: A GDAL API version must be specified. Provide a path "
- "to gdal-config using a GDAL_CONFIG environment variable "
- "or use a GDAL_VERSION environment variable.")
-
-gdal_version_parts = gdalversion.split('.')
-gdal_major_version = int(gdal_version_parts[0])
-gdal_minor_version = int(gdal_version_parts[1])
-
-if gdal_major_version == 1 and gdal_minor_version < 11:
- sys.exit("ERROR: GDAL >= 1.11 is required for rasterio. "
- "Please upgrade GDAL.")
+ if not gdalversion:
+ sys.exit("ERROR: A GDAL API version must be specified. Provide a path "
+ "to gdal-config using a GDAL_CONFIG environment variable "
+ "or use a GDAL_VERSION environment variable.")
+
+ gdal_version_parts = gdalversion.split('.')
+ gdal_major_version = int(gdal_version_parts[0])
+ gdal_minor_version = int(gdal_version_parts[1])
+
+ if gdal_major_version == 1 and gdal_minor_version < 11:
+ sys.exit("ERROR: GDAL >= 1.11 is required for rasterio. "
+ "Please upgrade GDAL.")
# Conditionally copy the GDAL data. To be used in conjunction with
# the bdist_wheel command to make self-contained binary wheels.
@@ -240,13 +244,14 @@ if os.environ.get('CYTHON_COVERAGE'):
log.debug('ext_options:\n%s', pprint.pformat(ext_options))
-if gdal_major_version >= 2:
- # GDAL>=2.0 does not require vendorized rasterfill.cpp
- cython_fill = ['rasterio/_fill.pyx']
- sdist_fill = ['rasterio/_fill.cpp']
-else:
- cython_fill = ['rasterio/_fill.pyx', 'rasterio/rasterfill.cpp']
- sdist_fill = ['rasterio/_fill.cpp', 'rasterio/rasterfill.cpp']
+if "clean" not in sys.argv:
+ if gdal_major_version >= 2:
+ # GDAL>=2.0 does not require vendorized rasterfill.cpp
+ cython_fill = ['rasterio/_fill.pyx']
+ sdist_fill = ['rasterio/_fill.cpp']
+ else:
+ cython_fill = ['rasterio/_fill.pyx', 'rasterio/rasterfill.cpp']
+ sdist_fill = ['rasterio/_fill.cpp', 'rasterio/rasterfill.cpp']
# When building from a repo, Cython is required.
0001-Rename-rio-to-rasterio-Closes-788463.patch
clean-target.patch
Python Quickstart
=================
Reading and writing data files is a spatial data analyst's bread and butter.
Reading and writing data files is a spatial data programmer's bread and butter.
This document explains how to use Rasterio to read existing files and to create
new files. Some advanced topics are glossed over to be covered in more detail
elsewhere in Rasterio's documentation. Only the GeoTIFF format is used here,
......@@ -11,7 +11,7 @@ Rasterio has been `installed <./installation>`__.
Opening a dataset in reading mode
---------------------------------
Consider an "example.tif" file with 16-bit Landsat 8 imagery covering a part
Consider a GeoTIFF file named :file:`example.tif` with 16-bit Landsat 8 imagery covering a part
of the United States's Colorado Plateau [#]_. Because the imagery is large (70
MB) and has a wide dynamic range it is difficult to display it in a browser.
A rescaled and dynamically squashed version is shown below.
......@@ -30,7 +30,7 @@ Next, open the file.
>>> dataset = rasterio.open('example.tif')
Rasterio's ``rasterio.open()`` takes a path and returns a dataset object. The
Rasterio's :func:`~rasterio.open` function takes a path string or path-like object and returns an opened dataset object. The
path may point to a file of any supported raster format. Rasterio will open it
using the proper GDAL format driver. Dataset objects have some of the same
attributes as Python file objects.
......@@ -47,8 +47,8 @@ attributes as Python file objects.
Dataset attributes
------------------
Properties of the raster data stored in "example.tif" can be accessed through
attributes of the ``dataset`` object. Dataset objects have bands and this
Properties of the raster data stored in the example GeoTIFF can be accessed through
attributes of the opened dataset object. Dataset objects have bands and this
example has a band count of 1.
.. code-block:: pycon
......@@ -56,14 +56,13 @@ example has a band count of 1.
>>> dataset.count
1
A dataset band is an array of values representing the partial distribution of
a single variable in 2-dimensional (2D) space. All band arrays of a dataset
have the same number of rows and columns. The variable represented by the
example dataset's sole band is Level-1 digital numbers (DN) for the Landsat
8 Operational Land Imager (OLI) band 4 (wavelengths between 640-670
nanometers). These values can be scaled to radiance or reflectance values. The
array of DN values has 7731 columns (its ``width``) and
7871 rows (its ``height``).
A dataset band is an array of values representing the partial distribution of a
single variable in 2-dimensional (2D) space. All band arrays of a dataset have
the same number of rows and columns. The variable represented by the example
dataset's sole band is Level-1 digital numbers (DN) for the Landsat 8
Operational Land Imager (OLI) band 4 (wavelengths between 640-670 nanometers).
These values can be scaled to radiance or reflectance values. The array of DN
values is 7731 columns wide and 7871 rows high.
.. code-block:: pycon
......@@ -74,15 +73,16 @@ array of DN values has 7731 columns (its ``width``) and
Some dataset attributes expose the properties of all dataset bands via a tuple
of values, one per band. To get a mapping of band indexes to variable data
types, apply a dictionary comprehension to the ``zip()`` product of a dataset's
``indexes`` and ``dtypes`` attributes.
types, apply a dictionary comprehension to the :func:`zip` product of a
dataset's :attr:`~rasterio.io.DatasetReader.indexes` and
:attr:`~rasterio.io.DatasetReader.dtypes` attributes.
.. code-block:: pycon
>>> {i: dtype for i, dtype in zip(dataset.indexes, dataset.dtypes)}
{1: 'uint16'}
The "example.tif" file's sole band contains unsigned 16-bit integer values. The
The example file's sole band contains unsigned 16-bit integer values. The
GeoTIFF format also supports signed integers and floats of different size.
Dataset georeferencing
......@@ -102,8 +102,8 @@ Our example covers the world from
meters to 4265115 meters bottom to top. It covers a region 231.93 kilometers
wide by 236.13 kilometers high.
The ``bounds`` attribute is derived from a more fundamental attribute: the
dataset's geospatial ``transform``.
The value of :attr:`~rasterio.io.DatasetReader.bounds` attribute is derived
from a more fundamental attribute: the dataset's geospatial transform.
.. code-block:: pycon
......@@ -111,10 +111,11 @@ dataset's geospatial ``transform``.
Affine(30.0, 0.0, 358485.0,
0.0, -30.0, 4265115.0)
The ``transform`` attribute is an affine transformation matrix that maps pixel
locations in (row, col) coordinates to (x, y) spatial positions. The product of
this matrix and ``(0, 0)``, the row and column coordinates of the upper left
corner of the dataset, is the spatial position of the upper left corner.
A dataset's :attr:`~rasterio.io.DatasetReader.transform` is an affine
transformation matrix that maps pixel locations in (row, col) coordinates to
(x, y) spatial positions. The product of this matrix and ``(0, 0)``, the row
and column coordinates of the upper left corner of the dataset, is the spatial
position of the upper left corner.
.. code-block:: pycon
......@@ -135,25 +136,24 @@ values are relative to the origin of the dataset's coordinate reference system
.. code-block:: pycon
>>> dataset.crs
CRS({'init': 'epsg:32612'})
CRS.from_epsg(32612)
"epsg:32612" identifies a particular coordinate reference system: `UTM
"EPSG 32612" identifies a particular coordinate reference system: `UTM
<https://en.wikipedia.org/wiki/Universal_Transverse_Mercator_coordinate_system>`__
zone 12N. This system is used for mapping areas in the Northern Hemisphere
between 108 and 114 degrees west. The upper left corner of the example dataset,
``(358485.0, 4265115.0)``, is 141.5 kilometers west of zone 12's central
meridian (111 degrees west) and 4265 kilometers north of the equator.
Coordinate reference systems are an advanced topic. Suffice it to say that
between the ``crs`` attribute and the ``transform`` attribute a raster dataset
is geo-referenced and can be compared to other GIS datasets.
Between the :attr:`~rasterio.io.DatasetReader.crs` attribute and ``transform``
the georeferencing of a raster dataset is described and the dataset can
compared to other GIS datasets.
Reading raster data
-------------------
The raster array for a raster band can be accessed by calling
``dataset.read()`` with the band's index number. Following the GDAL convention,
bands are indexed from 1.
Data from a raster band can be accessed by the band's index number. Following
the GDAL convention, bands are indexed from 1.
.. code-block:: pycon
......@@ -161,7 +161,7 @@ bands are indexed from 1.
(1,)
>>> band1 = dataset.read(1)
The ``read()`` method returns a Numpy N-D array.
The :meth:`~rasterio.io.DatasetReader.read` method returns a Numpy N-D array.
.. code-block:: pycon
......@@ -174,7 +174,7 @@ The ``read()`` method returns a Numpy N-D array.
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0]], dtype=uint16)
Values from the array can be had by their row, column index.
Values from the array can be addressed by their row, column index.
.. code-block:: pycon
......@@ -184,7 +184,8 @@ Values from the array can be had by their row, column index.
Spatial indexing
----------------
Datasets have a method of getting array indices for spatial points. To get the
Datasets have an :meth:`~rasterio.io.DatasetReader.index` method for getting
the array indices corresponding to points in georeferenced space. To get the
value for the pixel 100 kilometers east and 50 kilometers south of the
dataset's upper left corner, do the following.
......@@ -197,7 +198,7 @@ dataset's upper left corner, do the following.
>>> band_one[row, col]
7566
To get the spatial coordinates of a pixel, use the dataset's ``xy()`` method.
To get the spatial coordinates of a pixel, use the dataset's :meth:`~rasterio.io.DatasetReader.xy` method.
The coordinates of the center of the image can be computed like this.
.. code-block:: pycon
......@@ -240,26 +241,26 @@ Opening a dataset in writing mode
---------------------------------
To save this array along with georeferencing information to a new raster data
file, call ``rasterio.open()`` with a path to the new file to be created,
file, call :func:`rasterio.open` with a path to the new file to be created,
``'w'`` to specify writing mode, and several keyword arguments.
* ``driver``: the name of the desired format driver
* ``width``: the number of columns of the dataset
* ``height``: the number of rows of the dataset
* ``count``: a count of the dataset bands
* ``dtype``: the data type of the dataset
* ``crs``: a coordinate reference system identifier or description
* ``transform``: an affine transformation matrix, and
* ``nodata``: a "nodata" value
* *driver*: the name of the desired format driver
* *width*: the number of columns of the dataset
* *height*: the number of rows of the dataset
* *count*: a count of the dataset bands
* *dtype*: the data type of the dataset
* *crs*: a coordinate reference system identifier or description
* *transform*: an affine transformation matrix, and
* *nodata*: a "nodata" value
The first 5 of these keyword arguments parametrize fixed, format-specific
properties of the data file and are required when opening a file to
write. The last 3 are optional.
In this example the coordinate reference system will be "+proj=latlong", which
In this example the coordinate reference system will be ``'+proj=latlong'``, which
describes an equirectangular coordinate reference system with units of decimal
degrees. The right affine transformation matrix can be computed using
a function in the ``rasterio.transform`` module.
:func:`~rasterio.transform.from_origin`.
.. code-block:: pycon
......@@ -278,26 +279,35 @@ A dataset for storing the example grid is opened like so
.. code-block:: pycon
>>> new_dataset = rasterio.open('/tmp/new.tif', 'w', driver='GTiff',
... height=Z.shape[0], width=Z.shape[1],
... count=1, dtype=Z.dtype,
... crs='+proj=latlong', transform=transform)
Values for the `height`, `width`, and `dtype` keyword arguments are taken
>>> new_dataset = rasterio.open(
... '/tmp/new.tif',
... 'w',
... driver='GTiff',
... height=Z.shape[0],
... width=Z.shape[1],
... count=1,
... dtype=Z.dtype,
... crs='+proj=latlong',
... transform=transform,
... )
Values for the *height*, *width*, and *dtype* keyword arguments are taken
directly from attributes of the 2-D array, ``Z``. Not all raster formats can
support the 64-bit float values in ``Z``, but the GeoTIFF format can.
Saving raster data
------------------
To save the grid, call the new dataset's ``write()`` method with the grid and
target band number as arguments.
To copy the grid to the opened dataset, call the new dataset's
:meth:`~rasterio.io.DatasetWriter.write` method with the grid and target band
number as arguments.
.. code-block:: pycon
>>> new_dataset.write(Z, 1)
and then call the ``close()`` method to sync data to disk and finish.
Then call the :meth:`~rasterio.io.DatasetWriter.close` method to sync data to
disk and finish.
.. code-block:: pycon
......@@ -308,9 +318,17 @@ Python's context manager protocol, it is possible to do the following instead.
.. code-block:: python
with rasterio.open('/tmp/new.tif', 'w', driver='GTiff', height=Z.shape[0],
width=Z.shape[1], count=1, dtype=Z.dtype,
crs='+proj=latlong', transform=transform) as dst:
with rasterio.open(
'/tmp/new.tif',
'w',
driver='GTiff',
height=Z.shape[0],
width=Z.shape[1],
count=1,
dtype=Z.dtype,
crs='+proj=latlong',
transform=transform,
) as dst:
dst.write(Z, 1)
These are the basics of reading and writing raster data files. More features
......
......@@ -3,108 +3,66 @@ Resampling
For details on changing coordinate reference systems, see `Reprojection`.
Up and Downsampling
Up and downsampling
-------------------
*Resampling* refers to changing the cell values due to changes in the raster cell grid. This can occur during reprojection. Even if the crs is not changing, we may want to change the effective cell size of an existing dataset.
*Resampling* refers to changing the cell values due to changes in the raster
cell grid. This can occur during reprojection. Even if the projection is not
changing, we may want to change the effective cell size of an existing dataset.
*Upsampling* refers to cases where we are converting to higher resolution/smaller cells.
*Downsampling* is resampling to lower resolution/larger cellsizes.
*Upsampling* refers to cases where we are converting to higher
resolution/smaller cells. *Downsampling* is resampling to lower
resolution/larger cellsizes.
There are three potential ways to perform up/downsampling.
By reading from a raster source into an output array of a different size or by
specifying an *out_shape* of a different size you are effectively resampling
the data.
Use reproject
~~~~~~~~~~~~~
~
If you use ``reproject`` but keep the same CRS, you can utilize the underlying GDAL algorithms
to resample your data.
This involves coordinating the size of your output array with the
cell size in it's associated affine transform. In other words, if you *multiply* the resolution
by ``x``, you need to *divide* the affine parameters defining the cell size by ``x``
Here is an example of upsampling by a factor of 2 using the bilinear resampling
method.
.. code-block:: python
arr = src.read()
newarr = np.empty(shape=(arr.shape[0], # same number of bands
round(arr.shape[1] * 1.5), # 150% resolution
round(arr.shape[2] * 1.5)))
# adjust the new affine transform to the 150% smaller cell size
aff = src.transform
newaff = Affine(aff.a / 1.5, aff.b, aff.c,
aff.d, aff.e / 1.5, aff.f)
reproject(
arr, newarr,
src_transform = aff,
dst_transform = newaff,
src_crs = src.crs,
dst_crs = src.crs,
resampling = Resampling.bilinear)
import rasterio
from rasterio.enums import Resampling
Use scipy
~~~~~~~~~
with rasterio.open("example.tif") as dataset:
data = dataset.read(
out_shape=(dataset.height * 2, dataset.width * 2, dataset.count),
resampling=resampling.bilinear
)
You can also use `scipy.ndimage.interpolation.zoom`_ to "zoom" with a configurable spline interpolation
that differs from the resampling methods available in GDAL. This may not be appropriate for all data so check the results carefully. You must adjust the affine transform just as we did above.
Here is an example of downsampling by a factor of 2 using the average resampling
method.
.. code-block:: python
from scipy.ndimage.interpolation import zoom
# Increase resolution, decrease cell size by 150%
# Note we only zoom on axis 1 and 2
# axis 0 (our band axis) stays fixed
arr = src.read()
newarr = zoom(arr, zoom=[1, 1.5, 1.5], order=3, prefilter=False)
# Adjust original affine transform
aff = src.transform
newaff = Affine(aff.a / 1.5, aff.b, aff.c,
aff.d, aff.e / 1.5, aff.f)
Use decimated reads
~~~~~~~~~~~~~~~~~~~
Another technique for quickly up/downsampling data is to use decimated reads.
By reading from a raster source into an ``out`` array of a specified size, you
are effectively resampling the data to a new size.
.. warning::
The underlying GDALRasterIO function does not support different resampling
methods. You are stuck with the default which can result in unwanted effects
and data loss in some cases. We recommend using a different method unless
you are upsampling by an integer factor.
As per the previous two examples, you must also adjust the affine accordingly.
Note that this method is only recommended for *increasing* resolution by an integer factor.
.. code-block:: python
with rasterio.open("example.tif") as dataset:
data = dataset.read(
out_shape=(dataset.height / 2, dataset.width / 2, dataset.count),
resampling=resampling.average
)
newarr = np.empty(shape=(arr.shape[0], # same number of bands
round(arr.shape[1] * 2), # double resolution
round(arr.shape[2] * 2)))
.. note::
arr.read(out=newarr) # newarr is changed in-place
After these resolution changing operations, the dataset's resolution and the
resolution components of its affine *transform* property no longer apply to
the new arrays.
Resampling Methods
------------------
When you change the raster cell grid, you must recalulate the pixel values. There is no "correct" way to do this as all methods involve some interpolation.
When you change the raster cell grid, you must recalulate the pixel values.
There is no "correct" way to do this as all methods involve some interpolation.
The current resampling methods can be found in the `rasterio.enums`_ source.
Of note, the default ``Resampling.nearest`` method may not be suitable for continuous data. In those
cases, ``Resampling.bilinear`` and ``Resampling.cubic`` are better suited.
Some specialized statistical resampling method exist, e.g. ``Resampling.average``, which may be
useful when certain numerical properties of the data are to be retained.
Of note, the default ``Resampling.nearest`` method may not be suitable for
continuous data. In those cases, ``Resampling.bilinear`` and
``Resampling.cubic`` are better suited. Some specialized statistical
resampling method exist, e.g. ``Resampling.average``, which may be useful when
certain numerical properties of the data are to be retained.
.. _scipy.ndimage.interpolation.zoom: http://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.ndimage.interpolation.zoom.html
.. _rasterio.enums: https://github.com/mapbox/rasterio/blob/master/rasterio/enums.py#L28
......@@ -16,6 +16,8 @@ in pixels. These may be ints or floats.
.. code-block:: python
from rasterio.windows import Window
Window(col_off, row_off, width, height)
Windows may also be constructed from numpy array index tuples or slice objects.
......
name: _rasterio
channels:
- conda-forge
- defaults
dependencies:
- python=3.6.6
- libgdal=2.3.2
- python=3.6.*
- libgdal=2.3.*
......@@ -42,7 +42,7 @@ import rasterio.path
__all__ = ['band', 'open', 'pad', 'Env']
__version__ = "1.0.22"
__version__ = "1.0.23"
__gdal_version__ = gdal_version()
# Rasterio attaches NullHandler to the 'rasterio' logger and its
......
......@@ -26,6 +26,7 @@ from rasterio.enums import (
ColorInterp, Compression, Interleaving, MaskFlags, PhotometricInterp)
from rasterio.env import Env, env_ctx_if_needed
from rasterio.errors import (
DatasetAttributeError,
RasterioIOError, CRSError, DriverRegistrationError, NotGeoreferencedWarning,
RasterBlockError, BandOverviewError)
from rasterio.profiles import Profile
......@@ -450,7 +451,7 @@ cdef class DatasetBase(object):
return self.get_nodatavals()
def _set_nodatavals(self, value):
raise NotImplementedError
raise DatasetAttributeError("read-only attribute")
property nodata:
"""The dataset's single nodata value
......@@ -513,7 +514,7 @@ cdef class DatasetBase(object):
for x in self._mask_flags())
def _set_crs(self, value):
raise NotImplementedError
raise DatasetAttributeError("read-only attribute")
property crs:
"""The dataset's coordinate reference system
......@@ -533,16 +534,16 @@ cdef class DatasetBase(object):
self._set_crs(value)
def _set_all_descriptions(self, value):
raise NotImplementedError
raise DatasetAttributeError("read-only attribute")
def _set_all_scales(self, value):
raise NotImplementedError
raise DatasetAttributeError("read-only attribute")
def _set_all_offsets(self, value):
raise NotImplementedError
raise DatasetAttributeError("read-only attribute")
def _set_all_units(self, value):
raise NotImplementedError
raise DatasetAttributeError("read-only attribute")
property descriptions:
"""Descriptions for each dataset band
......@@ -563,7 +564,7 @@ cdef class DatasetBase(object):
self._set_all_descriptions(value)
def write_transform(self, value):
raise NotImplementedError
raise DatasetAttributeError("read-only attribute")
property transform:
"""The dataset's georeferencing transformation matrix
......@@ -948,6 +949,8 @@ cdef class DatasetBase(object):
cdef GDALMajorObjectH obj = NULL
cdef char **metadata = NULL
cdef const char *domain = NULL
cdef char *key = NULL
cdef char *val = NULL
if bidx > 0:
obj = self.band(bidx)
......@@ -959,7 +962,14 @@ cdef class DatasetBase(object):
metadata = GDALGetMetadata(obj, domain)
num_items = CSLCount(metadata)
return dict(metadata[i].split('=', 1) for i in range(num_items))
tag_items = []
for i in range(num_items):
val = CPLParseNameValue(metadata[i], &key)
tag_items.append((key[:], val[:]))
CPLFree(key)
return dict(tag_items)
def get_tag_item(self, ns, dm=None, bidx=0, ovr=None):
......@@ -1175,7 +1185,7 @@ cdef class DatasetBase(object):
for i in range(num_gcps)], crs)
def _set_gcps(self, values):
raise NotImplementedError
raise DatasetAttributeError("read-only attribute")
property gcps:
"""ground control points and their coordinate reference system.
......
......@@ -10,6 +10,7 @@ option is set to a new value inside the thread.
include "gdal.pxi"
from contextlib import contextmanager
import logging
import os
import os.path
......@@ -255,6 +256,16 @@ class GDALDataFinder(object):
return datadir if os.path.exists(os.path.join(datadir, 'pcs.csv')) else None
@contextmanager
def catch_errors():
"""Intercept GDAL errors"""
try:
CPLPushErrorHandler(CPLQuietErrorHandler)
yield None
finally:
CPLPopErrorHandler()
class PROJDataFinder(object):
"""Finds PROJ data files
......@@ -272,6 +283,7 @@ class PROJDataFinder(object):
cdef OGRSpatialReferenceH osr = OSRNewSpatialReference(NULL)
try:
with catch_errors():
exc_wrap_ogrerr(exc_wrap_int(OSRImportFromProj4(osr, "+init=epsg:4326")))
except CPLE_BaseError:
return False
......
......@@ -7,6 +7,7 @@ include "directives.pxi"
include "gdal.pxi"
from collections import Counter
from contextlib import contextmanager
import logging
import sys
import uuid
......@@ -67,6 +68,7 @@ def _delete_dataset_if_exists(path):
path = path.encode('utf-8')
c_path = path
with catch_errors():
with nogil:
h_dataset = GDALOpenShared(c_path, <GDALAccess>0)
try:
......@@ -779,6 +781,16 @@ cdef class DatasetReaderBase(DatasetBase):
return sample_gen(self, xy, indexes)
@contextmanager
def catch_errors():
"""Intercept GDAL errors"""
try:
CPLPushErrorHandler(CPLQuietErrorHandler)
yield None
finally:
CPLPopErrorHandler()
cdef class MemoryFileBase(object):
"""Base for a BytesIO-like class backed by an in-memory file."""
......@@ -1063,6 +1075,10 @@ cdef class DatasetWriterBase(DatasetReaderBase):
fname = name_b
# Process dataset opening options.
# "tiled" affects the meaning of blocksize, so we need it
# before iterating.
tiled = bool(kwargs.get('tiled', False))
for k, v in kwargs.items():
# Skip items that are definitely *not* valid driver
# options.
......@@ -1072,9 +1088,9 @@ cdef class DatasetWriterBase(DatasetReaderBase):
k, v = k.upper(), str(v).upper()
# Guard against block size that exceed image size.
if k == 'BLOCKXSIZE' and int(v) > width:
if k == 'BLOCKXSIZE' and tiled and int(v) > width:
raise ValueError("blockxsize exceeds raster width.")
if k == 'BLOCKYSIZE' and int(v) > height:
if k == 'BLOCKYSIZE' and tiled and int(v) > height:
raise ValueError("blockysize exceeds raster height.")
key_b = k.encode('utf-8')
......
......@@ -101,6 +101,9 @@ class CRS(collections.Mapping):
def __copy__(self):
return pickle.loads(pickle.dumps(self))
def __hash__(self):
return hash(self.to_wkt())
def to_proj4(self):
"""Convert CRS to a PROJ4 string
......@@ -275,7 +278,7 @@ class CRS(collections.Mapping):
def __repr__(self):
epsg_code = self.to_epsg()
if epsg_code:
return "CRS.from_dict(init='epsg:{}')".format(epsg_code)
return "CRS.from_epsg({})".format(epsg_code)
else:
return "CRS.from_wkt('{}')".format(self.wkt)
......
......@@ -102,3 +102,7 @@ class UnsupportedOperation(RasterioError):
class OverviewCreationError(RasterioError):
"""Raised when creation of an overview fails"""
class DatasetAttributeError(RasterioError, NotImplementedError):
"""Raised when dataset attributes are misused"""
......@@ -22,6 +22,8 @@ cdef extern from "cpl_error.h" nogil:
CE_Failure
CE_Fatal
ctypedef int CPLErrorNum
# CPLErrorNum eludes me at the moment, I'm calling it 'int'
# for now.
ctypedef void (*CPLErrorHandler)(CPLErr, int, const char*)
......@@ -32,6 +34,7 @@ cdef extern from "cpl_error.h" nogil:
CPLErr CPLGetLastErrorType()
void CPLPushErrorHandler(CPLErrorHandler handler)
void CPLPopErrorHandler()
void CPLQuietErrorHandler(CPLErr eErrClass, CPLErrorNum nError, const char *pszErrorMsg)
cdef extern from "cpl_string.h" nogil:
......@@ -47,6 +50,7 @@ cdef extern from "cpl_string.h" nogil:
char **CSLSetNameValue(char **list, char *name, char *val)
void CSLDestroy(char **list)
char **CSLMerge(char **first, char **second)
const char* CPLParseNameValue(const char *pszNameValue, char **ppszKey)
cdef extern from "cpl_vsi.h" nogil:
......
"""$ rio calc"""
from __future__ import division
from collections import OrderedDict
from distutils.version import LooseVersion
import math
import click
import snuggs
......@@ -9,19 +12,20 @@ import snuggs
import rasterio
from rasterio.features import sieve
from rasterio.fill import fillnodata
from rasterio.windows import Window
from rasterio.rio import options
from rasterio.rio.helpers import resolve_inout
def get_bands(inputs, d, i=None):
def _get_bands(inputs, sources, d, i=None):
"""Get a rasterio.Band object from calc's inputs"""
path = inputs[d] if d in dict(inputs) else inputs[int(d) - 1][1]
src = rasterio.open(path)
idx = d if d in dict(inputs) else int(d) - 1
src = sources[idx]
return (rasterio.band(src, i) if i else
[rasterio.band(src, j) for j in src.indexes])
def read_array(ix, subix=None, dtype=None):
def _read_array(ix, subix=None, dtype=None):
"""Change the type of a read array"""
arr = snuggs._ctx.lookup(ix, subix)
if dtype:
......@@ -29,6 +33,49 @@ def read_array(ix, subix=None, dtype=None):
return arr
def _chunk_output(width, height, count, itemsize, mem_limit=1):
"""Divide the calculation output into chunks
This function determines the chunk size such that an array of shape
(chunk_size, chunk_size, count) with itemsize bytes per element
requires no more than mem_limit megabytes of memory.
Output chunks are described by rasterio Windows.
Parameters
----------
width : int
Output width
height : int
Output height
count : int
Number of output bands
itemsize : int
Number of bytes per pixel
mem_limit : int, default
The maximum size in memory of a chunk array
Returns
-------
sequence of Windows
"""
max_pixels = mem_limit * 1.0e+6 / itemsize * count
chunk_size = int(math.floor(math.sqrt(max_pixels)))
ncols = int(math.ceil(width / chunk_size))
nrows = int(math.ceil(height / chunk_size))
chunk_windows = []
for col in range(ncols):
col_offset = col * chunk_size
w = min(chunk_size, width - col_offset)
for row in range(nrows):
row_offset = row * chunk_size
h = min(chunk_size, height - row_offset)
chunk_windows.append(((row, col), Window(col_offset, row_offset, w, h)))
return chunk_windows
@click.command(short_help="Raster data calculator.")
@click.argument('command')
@options.files_inout_arg
......@@ -40,10 +87,10 @@ def read_array(ix, subix=None, dtype=None):
@options.dtype_opt
@options.masked_opt
@options.overwrite_opt
@click.option("--mem-limit", type=int, default=64, help="Limit on memory used to perform calculations, in MB.")
@options.creation_options
@click.pass_context
def calc(ctx, command, files, output, name, dtype, masked, overwrite,
creation_options):
def calc(ctx, command, files, output, name, dtype, masked, overwrite, mem_limit, creation_options):
"""A raster data calculator
Evaluates an expression using input datasets and writes the result
......@@ -56,13 +103,13 @@ def calc(ctx, command, files, output, name, dtype, masked, overwrite,
\b
* (read i) evaluates to the i-th input dataset (a 3-D array).
* (read i j) evaluates to the j-th band of the i-th dataset (a 2-D
array).
* (take foo j) evaluates to the j-th band of a dataset named foo (see
help on the --name option above).
* (read i j) evaluates to the j-th band of the i-th dataset (a
2-D array).
* (take foo j) evaluates to the j-th band of a dataset named foo
(see help on the --name option above).
* Standard numpy array operators (+, -, *, /) are available.
* When the final result is a list of arrays, a multi band output
file is written.
* When the final result is a list of arrays, a multiple band
output file is written.
* When the final result is a single array, a single band output
file is written.
......@@ -72,15 +119,18 @@ def calc(ctx, command, files, output, name, dtype, masked, overwrite,
$ rio calc "(+ 2 (* 0.95 (read 1)))" tests/data/RGB.byte.tif \\
> /tmp/out.tif
Produces a 3-band GeoTIFF with all values scaled by 0.95 and
incremented by 2.
The command above produces a 3-band GeoTIFF with all values scaled
by 0.95 and incremented by 2.
\b
$ rio calc "(asarray (+ 125 (read 1)) (read 1) (read 1))" \\
> tests/data/shade.tif /tmp/out.tif
Produces a 3-band RGB GeoTIFF, with red levels incremented by 125,
from the single-band input.
The command above produces a 3-band RGB GeoTIFF, with red levels
incremented by 125, from the single-band input.
The maximum amount of memory used to perform caculations defaults to
64 MB. This number can be increased to improve speed of calculation.
"""
import numpy as np
......@@ -89,19 +139,36 @@ def calc(ctx, command, files, output, name, dtype, masked, overwrite,
with ctx.obj['env']:
output, files = resolve_inout(files=files, output=output,
overwrite=overwrite)
inputs = ([tuple(n.split('=')) for n in name] +
[(None, n) for n in files])
sources = [rasterio.open(path) for name, path in inputs]
with rasterio.open(inputs[0][1]) as first:
kwargs = first.meta
first = sources[0]
kwargs = first.profile
kwargs.update(**creation_options)
dtype = dtype or first.meta['dtype']
kwargs['dtype'] = dtype
# Extend snuggs.
snuggs.func_map['read'] = _read_array
snuggs.func_map['band'] = lambda d, i: _get_bands(inputs, sources, d, i)
snuggs.func_map['bands'] = lambda d: _get_bands(inputs, sources, d)
snuggs.func_map['fillnodata'] = lambda *args: fillnodata(*args)
snuggs.func_map['sieve'] = lambda *args: sieve(*args)
dst = None
# The windows iterator is initialized with a single sample.
# The actual work windows will be added in the second
# iteration of the loop.
work_windows = [(None, Window(0, 0, 16, 16))]
for ij, window in work_windows:
ctxkwds = OrderedDict()
for i, (name, path) in enumerate(inputs):
with rasterio.open(path) as src:
for i, ((name, path), src) in enumerate(zip(inputs, sources)):
# Using the class method instead of instance
# method. Latter raises
#
......@@ -110,16 +177,9 @@ def calc(ctx, command, files, output, name, dtype, masked, overwrite,
#
# possibly something to do with the instance being
# a masked array.
ctxkwds[name or '_i%d' % (i + 1)] = src.read(masked=masked)
ctxkwds[name or '_i%d' % (i + 1)] = src.read(masked=masked, window=window)
# Extend snuggs.
snuggs.func_map['read'] = read_array
snuggs.func_map['band'] = lambda d, i: get_bands(inputs, d, i)
snuggs.func_map['bands'] = lambda d: get_bands(inputs, d)
snuggs.func_map['fillnodata'] = lambda *args: fillnodata(*args)
snuggs.func_map['sieve'] = lambda *args: sieve(*args)
res = snuggs.eval(command, ctxkwds)
res = snuggs.eval(command, **ctxkwds)
if (isinstance(res, np.ma.core.MaskedArray) and (
tuple(LooseVersion(np.__version__).version) < (1, 9) or
......@@ -132,10 +192,16 @@ def calc(ctx, command, files, output, name, dtype, masked, overwrite,
results = np.asanyarray(
[np.ndarray.astype(res, dtype, copy=False)])
# The first iteration is only to get sample results and from them
# compute some properties of the output dataset.
if dst is None:
kwargs['count'] = results.shape[0]
dst = rasterio.open(output, 'w', **kwargs)
work_windows.extend(_chunk_output(dst.width, dst.height, dst.count, np.dtype(dst.dtypes[0]).itemsize, mem_limit=mem_limit))
with rasterio.open(output, 'w', **kwargs) as dst:
dst.write(results)
# In subsequent iterations we write results.
else:
dst.write(results, window=window)
except snuggs.ExpressionError as err:
click.echo("Expression Error:")
......@@ -143,3 +209,9 @@ def calc(ctx, command, files, output, name, dtype, masked, overwrite,
click.echo(' ' + ' ' * err.offset + "^")
click.echo(err)
raise click.Abort()
finally:
if dst:
dst.close()
for src in sources:
src.close()
......@@ -199,8 +199,6 @@ class AWSSession(Session):
if session:
self._session = session
elif aws_unsigned:
self._session = None
else:
self._session = boto3.Session(
aws_access_key_id=aws_access_key_id,
......@@ -258,7 +256,10 @@ class AWSSession(Session):
"""
if self.unsigned:
return {'AWS_NO_SIGN_REQUEST': 'YES'}
opts = {'AWS_NO_SIGN_REQUEST': 'YES'}
if 'aws_region' in self.credentials:
opts['AWS_REGION'] = self.credentials['aws_region']
return opts
else:
return {k.upper(): v for k, v in self.credentials.items()}
......
......@@ -260,7 +260,7 @@ def test_has_wkt_property():
def test_repr():
assert repr(CRS({'init': 'epsg:4326'})).startswith("CRS.from_dict(init")
assert repr(CRS({'init': 'epsg:4326'})).startswith("CRS.from_epsg(4326)")
def test_dunder_str():
......@@ -451,3 +451,13 @@ def test_linear_units():
def test_crs_copy():
"""CRS can be copied"""
assert copy.copy(CRS.from_epsg(3857)).wkt.startswith('PROJCS["WGS 84 / Pseudo-Mercator",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84"')
def test_crs_hash():
"""hashes of equivalent CRS are equal"""
assert hash(CRS.from_epsg(3857)) == hash(CRS.from_epsg(3857))
def test_crs_hash_unequal():
"""hashes of non-equivalent CRS are not equal"""
assert hash(CRS.from_epsg(3857)) != hash(CRS.from_epsg(4326))
......@@ -11,7 +11,8 @@ import pytest
import rasterio
from rasterio.enums import Compression
from rasterio.errors import RasterioIOError
from rasterio.errors import RasterioIOError, DatasetAttributeError
from rasterio.transform import Affine
def test_files(data):
......@@ -40,3 +41,40 @@ def test_dataset_compression(path_rgb_byte_tif, tag_value):
dataset.tags = MagicMock()
dataset.tags.return_value = {'COMPRESSION': tag_value}
assert dataset.compression == Compression(tag_value)
def test_untiled_dataset_blocksize(tmp_path):
"""Blocksize is not relevant to untiled datasets (see #1689)"""
pytest.importorskip("pathlib")
tmp_file = tmp_path / "test.tif"
with rasterio.open(
tmp_file, "w", driver="GTiff", count=1, height=13, width=13, dtype="uint8", crs="epsg:3857",
transform=Affine.identity(), blockxsize=256, blockysize=256) as dataset:
pass
with rasterio.open(tmp_file) as dataset:
assert not dataset.profile["tiled"]
assert dataset.shape == (13, 13)
def test_tiled_dataset_blocksize_guard(tmp_path):
"""Tiled datasets with dimensions less than blocksize are not permitted"""
pytest.importorskip("pathlib")
tmp_file = tmp_path / "test.tif"
with pytest.raises(ValueError):
rasterio.open(
tmp_file, "w", driver="GTiff", count=1, height=13, width=13, dtype="uint8", crs="epsg:3857",
transform=Affine.identity(), tiled=True, blockxsize=256, blockysize=256)
def test_dataset_readonly_attributes(path_rgb_byte_tif):
"""Attempts to set read-only attributes fail with DatasetAttributeError"""
with pytest.raises(DatasetAttributeError):
with rasterio.open(path_rgb_byte_tif) as dataset:
dataset.crs = "foo"
def test_dataset_readonly_attributes(path_rgb_byte_tif):
"""Attempts to set read-only attributes still fail with NotImplementedError"""
with pytest.raises(NotImplementedError):
with rasterio.open(path_rgb_byte_tif) as dataset:
dataset.crs = "foo"