Bas Couwenberg · Bas Couwenberg · Bas Couwenberg · Bas Couwenberg · 4f38ed35 · 4f38ed35
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -2,11 +2,11 @@ environment:
  CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
  matrix:
    - TARGET_ARCH: x64
-      NPY: 1.15
+      NPY: 1.16
      PY: 3.6

    - TARGET_ARCH: x64
-      NPY: 1.15
+      NPY: 1.16
      PY: 3.7

 platform:
@@ -25,20 +25,18 @@ install:

  # Add path, activate `conda` and update conda.
  - cmd: call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
-  - cmd: conda.exe config --set always_yes yes --set changeps1 no --set show_channel_urls true
-  - cmd: conda.exe update conda
-  - cmd: conda.exe config --remove channels defaults --force
-  - cmd: conda.exe config --add channels conda-forge --force
+  - cmd: conda config --set always_yes yes --set changeps1 no --set show_channel_urls true
+  - cmd: conda update conda
+  - cmd: conda config --remove channels defaults --force
+  - cmd: conda config --add channels conda-forge --force
  - cmd: set PYTHONUNBUFFERED=1
-  - cmd: conda.exe install conda-build vs2008_express_vc_python_patch
+  - cmd: conda install conda-build vs2008_express_vc_python_patch
  - cmd: call setup_x64

-  - cmd: conda.exe create --name TEST python=%PY% numpy=%NPY% cython pip pytest hdf5 libnetcdf cftime
+  - cmd: conda create --name TEST python=%PY% numpy=%NPY% cython pip pytest hdf5 libnetcdf cftime
+  - cmd: conda info --all
  - cmd: conda activate TEST

-  - cmd: conda.exe info --all
-  - cmd: conda.exe list
-
  - cmd: echo [options] > setup.cfg
  - cmd: echo [directories] >> setup.cfg
  - cmd: echo HDF5_libdir = %CONDA_PREFIX%\Library\lib >> setup.cfg

--- a/.travis.yml
+++ b/.travis.yml
@@ -24,27 +24,51 @@ python:
 matrix:
  allow_failures:
    - python: "3.8-dev"
+    - python: 3.7
+      env:
+        - MPI=1
+        - CC=mpicc.mpich
+        - DEPENDS="numpy>=1.9.0 cython>=0.21 setuptools>=18.0 mpi4py>=1.3.1 cftime"
+        - NETCDF_VERSION=GITMASTER
+        - NETCDF_DIR=$HOME
+        - PATH=${NETCDF_DIR}/bin:${PATH} # pick up nc-config here
  include:
    # Absolute minimum dependencies.
    - python: 2.7
      env:
        - DEPENDS="numpy==1.9.0 cython==0.21 ordereddict==1.1 setuptools==18.0 cftime"
-    # test MPI
-    - python: 2.7
-      dist: trusty
+    # test MPI with latest released version
+    - python: 3.7
+      dist: xenial
      env: 
        - MPI=1
-        - CC=mpicc
+        - CC=mpicc.mpich
        - DEPENDS="numpy>=1.9.0 cython>=0.21 setuptools>=18.0 mpi4py>=1.3.1 cftime"
-        - NETCDF_VERSION=4.6.1
+        - NETCDF_VERSION=4.6.2
        - NETCDF_DIR=$HOME
        - PATH=${NETCDF_DIR}/bin:${PATH} # pick up nc-config here
      addons:
        apt:
          packages:
-            - openmpi-bin
-            - libopenmpi-dev
-            - libhdf5-openmpi-dev
+            - mpich
+            - libmpich-dev
+            - libhdf5-mpich-dev
+    # test with netcdf-c from github master
+    - python: 3.7
+      dist: xenial
+      env:
+        - MPI=1
+        - CC=mpicc.mpich
+        - DEPENDS="numpy>=1.9.0 cython>=0.21 setuptools>=18.0 mpi4py>=1.3.1 cftime"
+        - NETCDF_VERSION=GITMASTER
+        - NETCDF_DIR=$HOME
+        - PATH=${NETCDF_DIR}/bin:${PATH} # pick up nc-config here
+      addons:
+        apt:
+          packages:
+            - mpich
+            - libmpich-dev
+            - libhdf5-mpich-dev

 notifications:
  email: false
@@ -59,11 +83,17 @@ install:
  - python setup.py install

 script:
+  - cd test
+  - python run_all.py
  - |
    if [ $MPI -eq 1 ] ; then
-       cd examples
-       mpirun -np 4 python mpi_example.py
-       cd ..
+       cd ../examples
+       mpirun.mpich -np 4 python mpi_example.py
+       if [ $? -ne 0 ] ; then
+         echo "mpi test failed!"
+         exit 1
+       else
+         echo "mpi test passed!"
+         exit 0
+       fi
    fi
-  - cd test
-  - python run_all.py
--- a/Changelog
+++ b/Changelog
+ version 1.4.3 (tag v1.4.3rel)
+=============================
+ * make set_always_mask work in MFDataset.
+ * fix saving diskless files to disk with netcdf-c >= 4.6.2.
+ * write to an in-memory Dataset, memoryview buffer returned by Dataset.close()
+   (issue #865, requires netcdf-c >= 4.6.2)
+ * fix performance regression when using large sequences of consecutive
+   integers for indexing with netcdf-c >= 4.6.2 (issue #870).
+ * improved error messages for ncinfo and other utilities (issue #873).
+ * fix for int64 attributes not being created for NETCDF3_64BIT_DATA (CDF5)
+   files (issue #878).
+ * fix for MPI parallel error ("NetCDF: Attempt to use feature that was not
+   turned on when netCDF was built") using netcdf-c 4.6.2 (issue #883).
+ * Added methods `set_ncstring_attrs()` to Dataset, Group and Variable that
+   forces all text attributes to be written as variable length strings (netCDF
+   type NC_STRING - issue #882).
+ * Allow parallel mode with NETCDF4_CLASSIC files (issue #890).
+
 version 1.4.2 (tag v1.4.2rel)
 =============================
 * add get_dims Variable method (issue #824)

--- a/PKG-INFO
+++ b/PKG-INFO
 Metadata-Version: 1.1
 Name: netCDF4
-Version: 1.4.2
+Version: 1.4.3
 Author: Jeff Whitaker
 Author-email: jeffrey s whitaker at noaa gov
 Home-page: https://github.com/Unidata/netcdf4-python

--- a/README.md
+++ b/README.md
@@ -8,6 +8,8 @@
 ## News
 For details on the latest updates, see the [Changelog](https://github.com/Unidata/netcdf4-python/blob/master/Changelog).

+03/05/2019: Version [1.4.3](https://pypi.python.org/pypi/netCDF4/1.4.3) released. Issues with netcdf-c 4.6.2 fixed (including broken parallel IO).  `set_ncstring_attrs()` method added, memoryview buffer now returned when an in-memory Dataset is closed.
+
 10/26/2018: Version [1.4.2](https://pypi.python.org/pypi/netCDF4/1.4.2) released. Minor bugfixes, added `Variable.get_dims()` method and `master_file` kwarg for `MFDataset.__init__`.

 08/10/2018: Version [1.4.1](https://pypi.python.org/pypi/netCDF4/1.4.1) released. The old slicing behavior

--- a/ci/travis/build-parallel-netcdf.sh
+++ b/ci/travis/build-parallel-netcdf.sh
@@ -4,9 +4,18 @@ set -e

 echo "Using downloaded netCDF version ${NETCDF_VERSION} with parallel capabilities enabled"
 pushd /tmp
-wget ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-${NETCDF_VERSION}.tar.gz
-tar -xzvf netcdf-${NETCDF_VERSION}.tar.gz
-pushd netcdf-${NETCDF_VERSION}
+if [ ${NETCDF_VERSION} == "GITMASTER" ]; then
+   git clone http://github.com/Unidata/netcdf-c netcdf-c
+   pushd netcdf-c
+   autoreconf -i
+else
+   wget ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-c-${NETCDF_VERSION}.tar.gz
+   tar -xzf netcdf-c-${NETCDF_VERSION}.tar.gz
+   pushd netcdf-c-${NETCDF_VERSION}
+fi
+# for Ubuntu xenial
+export CPPFLAGS="-I/usr/include/hdf5/mpich"
+export LIBS="-lhdf5_mpich_hl -lhdf5_mpich -lm -lz"
 ./configure --prefix $NETCDF_DIR --enable-netcdf-4 --enable-shared --disable-dap --enable-parallel
 make -j 2
 make install

--- a/debian/changelog
+++ b/debian/changelog
-netcdf4-python (1.4.2-2) UNRELEASED; urgency=medium
+netcdf4-python (1.4.3-1~exp1) experimental; urgency=medium

+  * New upstream release.
  * Bump Standards-Version to 4.3.0, no changes.

- -- Bas Couwenberg <sebastic@debian.org>  Tue, 25 Dec 2018 22:41:47 +0100
+ -- Bas Couwenberg <sebastic@debian.org>  Tue, 05 Mar 2019 06:56:31 +0100

 netcdf4-python (1.4.2-1) unstable; urgency=medium


--- a/docs/netCDF4/index.html
+++ b/docs/netCDF4/index.html
--- a/examples/mpi_example.py
+++ b/examples/mpi_example.py
@@ -4,7 +4,7 @@ import numpy as np
 from netCDF4 import Dataset
 rank = MPI.COMM_WORLD.rank  # The process ID (integer 0-3 for 4-process run)
 nc = Dataset('parallel_test.nc', 'w', parallel=True, comm=MPI.COMM_WORLD,
-        info=MPI.Info())
+        info=MPI.Info(),format='NETCDF4_CLASSIC')
 # below should work also - MPI_COMM_WORLD and MPI_INFO_NULL will be used.
 #nc = Dataset('parallel_test.nc', 'w', parallel=True)
 d = nc.createDimension('dim',4)

--- a/examples/tutorial.py
+++ b/examples/tutorial.py
@@ -320,3 +320,39 @@ statdat.set_auto_chartostring(False) # turn off auto-conversion
 statdat[:] = data.view(station_data_t.dtype)
 print(statdat[:]) # now structured array with char array subtype is returned
 nc.close()
+
+# create a diskless (in-memory) Dataset, and persist the file
+# to disk when it is closed.
+nc = Dataset('diskless_example.nc','w',diskless=True,persist=True)
+d = nc.createDimension('x',None)
+v = nc.createVariable('v',numpy.int32,'x')
+v[0:5] = numpy.arange(5)
+print(nc)
+print(nc['v'][:])
+nc.close() # file saved to disk
+# create an in-memory dataset from an existing python memory
+# buffer.
+# read the newly created netcdf file into a python bytes object.
+f = open('diskless_example.nc', 'rb')
+nc_bytes = f.read(); f.close()
+# create a netCDF in-memory dataset from the bytes object.
+nc = Dataset('inmemory.nc', memory=nc_bytes)
+print(nc)
+print(nc['v'][:])
+nc.close()
+# create an in-memory Dataset and retrieve memory buffer
+# estimated size is 1028 bytes - this is actually only
+# used if format is NETCDF3 (ignored for NETCDF4/HDF5 files).
+nc = Dataset('inmemory.nc', mode='w',memory=1028)
+d = nc.createDimension('x',None)
+v = nc.createVariable('v',numpy.int32,'x')
+v[0:5] = numpy.arange(5)
+nc_buf = nc.close() # close returns memoryview
+print(type(nc_buf))
+# save nc_buf to disk, read it back in and check.
+f = open('inmemory.nc', 'wb')
+f.write(nc_buf); f.close()
+nc = Dataset('inmemory.nc')
+print(nc)
+print(nc['v'][:])
+nc.close()
--- a/examples/writing_netCDF.ipynb
+++ b/examples/writing_netCDF.ipynb
 %% Cell type:markdown id: tags:

 # Writing netCDF data

 **Important Note**: when running this notebook interactively in a browser, you probably will not be able to execute individual cells out of order without getting an error.  Instead, choose "Run All" from the Cell menu after you modify a cell.

 %% Cell type:code id: tags:

 ``` python
 import netCDF4     # Note: python is case-sensitive!
 import numpy as np
 ```

 %% Cell type:markdown id: tags:

 ## Opening a file, creating a new Dataset

 Let's create a new, empty netCDF file named 'data/new.nc', opened for writing.

 Be careful, opening a file with 'w' will clobber any existing data (unless `clobber=False` is used, in which case an exception is raised if the file already exists).

 - `mode='r'` is the default.
 - `mode='a'` opens an existing file and allows for appending (does not clobber existing data)
 - `format` can be one of `NETCDF3_CLASSIC`, `NETCDF3_64BIT`, `NETCDF4_CLASSIC` or `NETCDF4` (default). `NETCDF4_CLASSIC` uses HDF5 for the underlying storage layer (as does `NETCDF4`) but enforces the classic netCDF 3 data model so data can be read with older clients.

 %% Cell type:code id: tags:

 ``` python
 try: ncfile.close()  # just to be safe, make sure dataset is not already open.
 except: pass
 ncfile = netCDF4.Dataset('data/new.nc',mode='w',format='NETCDF4_CLASSIC')
 print(ncfile)
 ```

 %% Output

    <type 'netCDF4._netCDF4.Dataset'>
    root group (NETCDF4_CLASSIC data model, file format HDF5):
        dimensions(sizes):
        variables(dimensions):
        groups:
    

 %% Cell type:markdown id: tags:

 ## Creating dimensions

 The **ncfile** object we created is a container for _dimensions_, _variables_, and _attributes_.   First, let's create some dimensions using the [`createDimension`](http://unidata.github.io/netcdf4-python/netCDF4.Dataset-class.html#createDimension) method.

 - Every dimension has a name and a length.
 - The name is a string that is used to specify the dimension to be used when creating a variable, and as a key to access the dimension object in the `ncfile.dimensions` dictionary.

 Setting the dimension length to `0` or `None` makes it unlimited, so it can grow.

 - For `NETCDF4` files, any variable's dimension can be unlimited.
 - For `NETCDF4_CLASSIC` and `NETCDF3*` files, only one per variable can be unlimited, and it must be the leftmost (fastest varying) dimension.

 %% Cell type:code id: tags:

 ``` python
 lat_dim = ncfile.createDimension('lat', 73)     # latitude axis
 lon_dim = ncfile.createDimension('lon', 144)    # longitude axis
 time_dim = ncfile.createDimension('time', None) # unlimited axis (can be appended to).
 for dim in ncfile.dimensions.items():
    print(dim)
 ```

 %% Output

    ('lat', <type 'netCDF4._netCDF4.Dimension'>: name = 'lat', size = 73
    )
    ('lon', <type 'netCDF4._netCDF4.Dimension'>: name = 'lon', size = 144
    )
    ('time', <type 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'time', size = 0
    )

 %% Cell type:markdown id: tags:

 ## Creating attributes

 netCDF attributes can be created just like you would for any python object.

 - Best to adhere to established conventions (like the [CF](http://cfconventions.org/) conventions)
 - We won't try to adhere to any specific convention here though.

 %% Cell type:code id: tags:

 ``` python
 ncfile.title='My model data'
 print(ncfile.title)
 ```

 %% Output

    My model data

 %% Cell type:markdown id: tags:

 Try adding some more attributes...

 %% Cell type:markdown id: tags:

 ## Creating variables

 Now let's add some variables and store some data in them.

 - A variable has a name, a type, a shape, and some data values.
 - The shape of a variable is specified by a tuple of dimension names.
 - A variable should also have some named attributes, such as 'units', that describe the data.

 The [`createVariable`](http://unidata.github.io/netcdf4-python/netCDF4.Dataset-class.html#createVariable) method takes 3 mandatory args.

 - the 1st argument is the variable name (a string). This is used as the key to access the variable object from the `variables` dictionary.
 - the 2nd argument is the datatype (most numpy datatypes supported).
 - the third argument is a tuple containing the dimension names (the dimensions must be created first).  Unless this is a `NETCDF4` file, any unlimited dimension must be the leftmost one.
 - there are lots of optional arguments (many of which are only relevant when `format='NETCDF4'`) to control compression, chunking, fill_value, etc.

 %% Cell type:code id: tags:

 ``` python
 # Define two variables with the same names as dimensions,
 # a conventional way to define "coordinate variables".
 lat = ncfile.createVariable('lat', np.float32, ('lat',))
 lat.units = 'degrees_north'
 lat.long_name = 'latitude'
 lon = ncfile.createVariable('lon', np.float32, ('lon',))
 lon.units = 'degrees_east'
 lon.long_name = 'longitude'
 time = ncfile.createVariable('time', np.float64, ('time',))
 time.units = 'hours since 1800-01-01'
 time.long_name = 'time'
 # Define a 3D variable to hold the data
 temp = ncfile.createVariable('temp',np.float64,('time','lat','lon')) # note: unlimited dimension is leftmost
 temp.units = 'K' # degrees Kelvin
 temp.standard_name = 'air_temperature' # this is a CF standard name
 print(temp)
 ```

 %% Output

    <type 'netCDF4._netCDF4.Variable'>
    float64 temp(time, lat, lon)
        units: K
        standard_name: air_temperature
    unlimited dimensions: time
    current shape = (0, 73, 144)
    filling on, default _FillValue of 9.96920996839e+36 used
    

 %% Cell type:markdown id: tags:

 ## Pre-defined variable attributes (read only)

 The netCDF4 module provides some useful pre-defined Python attributes for netCDF variables, such as dimensions, shape, dtype, ndim.

 Note: since no data has been written yet, the length of the 'time' dimension is 0.

 %% Cell type:code id: tags:

 ``` python
 print("-- Some pre-defined attributes for variable temp:")
 print("temp.dimensions:", temp.dimensions)
 print("temp.shape:", temp.shape)
 print("temp.dtype:", temp.dtype)
 print("temp.ndim:", temp.ndim)
 ```

 %% Output

    -- Some pre-defined attributes for variable temp:
    ('temp.dimensions:', (u'time', u'lat', u'lon'))
    ('temp.shape:', (0, 73, 144))
    ('temp.dtype:', dtype('float64'))
    ('temp.ndim:', 3)

 %% Cell type:markdown id: tags:

 ## Writing data

-To write data a netCDF variable object, just treat it like a numpy array and assign values to a slice.
+To write data to a netCDF variable object, just treat it like a numpy array and assign values to a slice.

 %% Cell type:code id: tags:

 ``` python
 nlats = len(lat_dim); nlons = len(lon_dim); ntimes = 3
 # Write latitudes, longitudes.
 # Note: the ":" is necessary in these "write" statements
 lat[:] = -90. + (180./nlats)*np.arange(nlats) # south pole to north pole
 lon[:] = (180./nlats)*np.arange(nlons) # Greenwich meridian eastward
 # create a 3D array of random numbers
 data_arr = np.random.uniform(low=280,high=330,size=(ntimes,nlats,nlons))
 # Write the data.  This writes the whole 3D netCDF variable all at once.
 temp[:,:,:] = data_arr  # Appends data along unlimited dimension
 print("-- Wrote data, temp.shape is now ", temp.shape)
 # read data back from variable (by slicing it), print min and max
 print("-- Min/Max values:", temp[:,:,:].min(), temp[:,:,:].max())
 ```

 %% Output

    ('-- Wrote data, temp.shape is now ', (3, 73, 144))
    ('-- Min/Max values:', 280.00283562143028, 329.99987991477548)

 %% Cell type:markdown id: tags:

 - You can just treat a netCDF Variable object like a numpy array and assign values to it.
 - Variables automatically grow along unlimited dimensions (unlike numpy arrays)
 - The above writes the whole 3D variable all at once,  but you can write it a slice at a time instead.

 Let's add another time slice....

 %% Cell type:code id: tags:

 ``` python
 # create a 2D array of random numbers
 data_slice = np.random.uniform(low=280,high=330,size=(nlats,nlons))
 temp[3,:,:] = data_slice   # Appends the 4th time slice
 print("-- Wrote more data, temp.shape is now ", temp.shape)
 ```

 %% Output

    ('-- Wrote more data, temp.shape is now ', (4, 73, 144))

 %% Cell type:markdown id: tags:

 Note that we have not yet written any data to the time variable.  It automatically grew as we appended data along the time dimension to the variable `temp`, but the data is missing.

 %% Cell type:code id: tags:

 ``` python
 print(time)
 times_arr = time[:]
 print(type(times_arr),times_arr)  # dashes indicate masked values (where data has not yet been written)
 ```

 %% Output

    <type 'netCDF4._netCDF4.Variable'>
    float64 time(time)
        units: hours since 1800-01-01
        long_name: time
    unlimited dimensions: time
    current shape = (4,)
    filling on, default _FillValue of 9.96920996839e+36 used
    
    (<class 'numpy.ma.core.MaskedArray'>, masked_array(data = [-- -- -- --],
                 mask = [ True  True  True  True],
           fill_value = 9.96920996839e+36)
    )

 %% Cell type:markdown id: tags:

 Let's add write some data into the time variable.

 - Given a set of datetime instances, use date2num to convert to numeric time values and then write that data to the variable.

 %% Cell type:code id: tags:

 ``` python
 from datetime import datetime
 from netCDF4 import date2num,num2date
 # 1st 4 days of October.
 dates = [datetime(2014,10,1,0),datetime(2014,10,2,0),datetime(2014,10,3,0),datetime(2014,10,4,0)]
 print(dates)
 times = date2num(dates, time.units)
 print(times, time.units) # numeric values
 time[:] = times
 # read time data back, convert to datetime instances, check values.
 print(num2date(time[:],time.units))
 ```

 %% Output

    [datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 2, 0, 0), datetime.datetime(2014, 10, 3, 0, 0), datetime.datetime(2014, 10, 4, 0, 0)]
    (array([ 1882440.,  1882464.,  1882488.,  1882512.]), u'hours since 1800-01-01')
    [datetime.datetime(2014, 10, 1, 0, 0) datetime.datetime(2014, 10, 2, 0, 0)
     datetime.datetime(2014, 10, 3, 0, 0) datetime.datetime(2014, 10, 4, 0, 0)]

 %% Cell type:markdown id: tags:

 ## Closing a netCDF file

 It's **important** to close a netCDF file you opened for writing:

 - flushes buffers to make sure all data gets written
 - releases memory resources used by open netCDF files

 %% Cell type:code id: tags:

 ``` python
 # first print the Dataset object to see what we've got
 print(ncfile)
 # close the Dataset.
 ncfile.close(); print('Dataset is closed!')
 ```

 %% Output

    <type 'netCDF4._netCDF4.Dataset'>
    root group (NETCDF4_CLASSIC data model, file format HDF5):
        title: My model data
        dimensions(sizes): lat(73), lon(144), time(4)
        variables(dimensions): float32 [4mlat[0m(lat), float32 [4mlon[0m(lon), float64 [4mtime[0m(time), float64 [4mtemp[0m(time,lat,lon)
        groups:
    
    Dataset is closed!

 %% Cell type:markdown id: tags:

 # Advanced features

 So far we've only exercised features associated with the old netCDF version 3 data model.  netCDF version 4 adds a lot of new functionality that comes with the more flexible HDF5 storage layer.

 Let's create a new file with `format='NETCDF4'` so we can try out some of these features.

 %% Cell type:code id: tags:

 ``` python
 ncfile = netCDF4.Dataset('data/new2.nc','w',format='NETCDF4')
 print(ncfile)
 ```

 %% Output

    <type 'netCDF4._netCDF4.Dataset'>
    root group (NETCDF4 data model, file format HDF5):
        dimensions(sizes):
        variables(dimensions):
        groups:
    

 %% Cell type:markdown id: tags:

 ## Creating Groups

 netCDF version 4 added support for organizing data in hierarchical groups.

 - analagous to directories in a filesystem.
 - Groups serve as containers for variables, dimensions and attributes, as well as other groups.
 - A `netCDF4.Dataset` creates a special group, called the 'root group', which is similar to the root directory in a unix filesystem.

 - groups are created using the [`createGroup`](http://unidata.github.io/netcdf4-python/netCDF4.Dataset-class.html#createGroup) method.
 - takes a single argument (a string, which is the name of the Group instance).  This string is used as a key to access the group instances in the `groups` dictionary.

 Here we create two groups to hold data for two different model runs.

 %% Cell type:code id: tags:

 ``` python
 grp1 = ncfile.createGroup('model_run1')
 grp2 = ncfile.createGroup('model_run2')
 for grp in ncfile.groups.items():
    print(grp)
 ```

 %% Output

    ('model_run1', <type 'netCDF4._netCDF4.Group'>
    group /model_run1:
        dimensions(sizes):
        variables(dimensions):
        groups:
    )
    ('model_run2', <type 'netCDF4._netCDF4.Group'>
    group /model_run2:
        dimensions(sizes):
        variables(dimensions):
        groups:
    )

 %% Cell type:markdown id: tags:

 Create some dimensions in the root group.

 %% Cell type:code id: tags:

 ``` python
 lat_dim = ncfile.createDimension('lat', 73)     # latitude axis
 lon_dim = ncfile.createDimension('lon', 144)    # longitude axis
 time_dim = ncfile.createDimension('time', None) # unlimited axis (can be appended to).
 ```

 %% Cell type:markdown id: tags:

 Now create a variable in grp1 and grp2.  The library will search recursively upwards in the group tree to find the dimensions (which in this case are defined one level up).

 - These variables are create with **zlib compression**, another nifty feature of netCDF 4.
 - The data are automatically compressed when data is written to the file, and uncompressed when the data is read.
 - This can really save disk space, especially when used in conjunction with the [**least_significant_digit**](http://unidata.github.io/netcdf4-python/netCDF4.Dataset-class.html#createVariable) keyword argument, which causes the data to be quantized (truncated) before compression.  This makes the compression lossy, but more efficient.

 %% Cell type:code id: tags:

 ``` python
 temp1 = grp1.createVariable('temp',np.float64,('time','lat','lon'),zlib=True)
 temp2 = grp2.createVariable('temp',np.float64,('time','lat','lon'),zlib=True)
 for grp in ncfile.groups.items():  # shows that each group now contains 1 variable
    print(grp)
 ```

 %% Output

    ('model_run1', <type 'netCDF4._netCDF4.Group'>
    group /model_run1:
        dimensions(sizes):
        variables(dimensions): float64 [4mtemp[0m(time,lat,lon)
        groups:
    )
    ('model_run2', <type 'netCDF4._netCDF4.Group'>
    group /model_run2:
        dimensions(sizes):
        variables(dimensions): float64 [4mtemp[0m(time,lat,lon)
        groups:
    )

 %% Cell type:markdown id: tags:

 ##Creating a variable with a compound data type

 - Compound data types map directly to numpy structured (a.k.a 'record' arrays).
 - Structured arrays are akin to C structs, or derived types in Fortran.
 - They allow for the construction of table-like structures composed of combinations of other data types, including other compound types.
 - Might be useful for representing multiple parameter values at each point on a grid, or at each time and space location for scattered (point) data.

 Here we create a variable with a compound data type to represent complex data (there is no native complex data type in netCDF).

 - The compound data type is created with the [`createCompoundType`](http://unidata.github.io/netcdf4-python/netCDF4.Dataset-class.html#createCompoundType) method.

 %% Cell type:code id: tags:

 ``` python
 # create complex128 numpy structured data type
 complex128 = np.dtype([('real',np.float64),('imag',np.float64)])
 # using this numpy dtype, create a netCDF compound data type object
 # the string name can be used as a key to access the datatype from the cmptypes dictionary.
 complex128_t = ncfile.createCompoundType(complex128,'complex128')
 # create a variable with this data type, write some data to it.
 cmplxvar = grp1.createVariable('cmplx_var',complex128_t,('time','lat','lon'))
 # write some data to this variable
 # first create some complex random data
 nlats = len(lat_dim); nlons = len(lon_dim)
 data_arr_cmplx = np.random.uniform(size=(nlats,nlons))+1.j*np.random.uniform(size=(nlats,nlons))
 # write this complex data to a numpy complex128 structured array
 data_arr = np.empty((nlats,nlons),complex128)
 data_arr['real'] = data_arr_cmplx.real; data_arr['imag'] = data_arr_cmplx.imag
 cmplxvar[0] = data_arr  # write the data to the variable (appending to time dimension)
 print(cmplxvar)
 data_out = cmplxvar[0] # read one value of data back from variable
 print(data_out.dtype, data_out.shape, data_out[0,0])
 ```

 %% Output

    <type 'netCDF4._netCDF4.Variable'>
    compound cmplx_var(time, lat, lon)
    compound data type: [('real', '<f8'), ('imag', '<f8')]
    path = /model_run1
    unlimited dimensions: time
    current shape = (1, 73, 144)
    
    (dtype([('real', '<f8'), ('imag', '<f8')]), (73, 144), (0.578177705604801, 0.18086070805676357))

 %% Cell type:markdown id: tags:

 ##Creating a variable with a variable-length (vlen) data type

 netCDF 4 has support for variable-length or "ragged" arrays. These are arrays of variable length sequences having the same type.

 - To create a variable-length data type, use the [`createVLType`](http://unidata.github.io/netcdf4-python/netCDF4.Dataset-class.html#createVLType) method.
 - The numpy datatype of the variable-length sequences and the name of the new datatype must be specified.

 %% Cell type:code id: tags:

 ``` python
 vlen_t = ncfile.createVLType(np.int64, 'phony_vlen')
 ```

 %% Cell type:markdown id: tags:

 A new variable can then be created using this datatype.

 %% Cell type:code id: tags:

 ``` python
 vlvar = grp2.createVariable('phony_vlen_var', vlen_t, ('time','lat','lon'))
 ```

 %% Cell type:markdown id: tags:

 Since there is no native vlen datatype in numpy, vlen arrays are represented in python as object arrays (arrays of dtype `object`).

 - These are arrays whose elements are Python object pointers, and can contain any type of python object.
 - For this application, they must contain 1-D numpy arrays all of the same type but of varying length.
 - Fill with 1-D random numpy int64 arrays of random length between 1 and 10.

 %% Cell type:code id: tags:

 ``` python
 vlen_data = np.empty((nlats,nlons),object)
 for i in range(nlons):
    for j in range(nlats):
        size = np.random.randint(1,10,size=1) # random length of sequence
        vlen_data[j,i] = np.random.randint(0,10,size=size)# generate random sequence
 vlvar[0] = vlen_data # append along unlimited dimension (time)
 print(vlvar)
 print('data =\n',vlvar[:])
 ```

 %% Output

    <type 'netCDF4._netCDF4.Variable'>
    vlen phony_vlen_var(time, lat, lon)
    vlen data type: int64
    path = /model_run2
    unlimited dimensions: time
    current shape = (1, 73, 144)
    
    ('data =\n', array([[[array([0, 4, 0, 9, 2, 2, 2, 4, 2]), array([7, 5, 4, 4, 9, 8, 0]),
             array([3, 6, 6, 8, 2, 7]), ..., array([5, 0, 0, 8, 8, 1, 5, 3]),
             array([4, 2, 7]), array([0])],
            [array([5, 6, 6, 6, 1, 0, 7]), array([7]),
             array([7, 5, 8, 9, 6, 9, 3]), ..., array([0, 6, 5, 4]),
             array([7, 1, 9, 7, 7, 2]), array([1, 4, 0])],
            [array([4, 3, 1]), array([6, 3, 9, 7, 8]), array([8]), ...,
             array([6, 5, 8, 0]), array([0]), array([0, 9, 6, 2, 4])],
            ...,
            [array([8, 4, 4]), array([4, 1, 6]), array([1, 4, 2, 3, 9]), ...,
             array([9, 1]), array([7, 2, 5, 1, 5, 8, 2]),
             array([2, 9, 9, 1, 4, 6, 3, 5, 2])],
            [array([4, 7, 9, 8, 2, 3, 6, 6]),
             array([1, 4, 1, 6, 1, 1, 2, 3, 9]),
             array([9, 5, 6, 2, 4, 3, 8, 2, 9]), ..., array([9, 5, 7]),
             array([3, 9]), array([4, 2, 6, 9])],
            [array([8, 9, 9, 2, 2, 8, 8, 5]), array([3]),
             array([8, 8, 0, 2, 9, 2, 3, 0, 9]), ..., array([7]),
             array([5, 1, 0, 6, 8, 6]), array([8, 6, 3, 6, 9, 8, 4, 2, 5])]]], dtype=object))

 %% Cell type:markdown id: tags:

 Close the Dataset and examine the contents with ncdump.

 %% Cell type:code id: tags:

 ``` python
 ncfile.close()
 !ncdump -h data/new2.nc
 ```

 %% Output

    netcdf new2 {
    types:
      compound complex128 {
        double real ;
        double imag ;
      }; // complex128
      int64(*) phony_vlen ;
    dimensions:
    	lat = 73 ;
    	lon = 144 ;
    	time = UNLIMITED ; // (1 currently)
    
    group: model_run1 {
      variables:
      	double temp(time, lat, lon) ;
      	complex128 cmplx_var(time, lat, lon) ;
      } // group model_run1
    
    group: model_run2 {
      variables:
      	double temp(time, lat, lon) ;
      	phony_vlen phony_vlen_var(time, lat, lon) ;
      } // group model_run2
    }

 %% Cell type:markdown id: tags:

 ##Other interesting and useful projects using netcdf4-python

- [Xray](http://xray.readthedocs.org/en/stable/): N-dimensional variant of the core [pandas](http://pandas.pydata.org) data structure that can operate on netcdf variables.
- [Iris](http://scitools.org.uk/iris/): a data model to create a data abstraction layer which isolates analysis and visualisation code from data format specifics.  Uses netcdf4-python to access netcdf data (can also handle GRIB).
- [Biggus](https://github.com/SciTools/biggus): Virtual large arrays (from netcdf variables) with lazy evaluation.
- [cf-python](http://cfpython.bitbucket.org/): Implements the [CF](http://cfconventions.org) data model for the reading, writing and processing of data and metadata.
+- [xarray](https://xarray.pydata.org/en/stable/): N-dimensional variant of the core [pandas](https://pandas.pydata.org) data structure that can operate on netcdf variables.
+- [Iris](https://scitools.org.uk/iris/docs/latest/): a data model to create a data abstraction layer which isolates analysis and visualisation code from data format specifics.  Uses netcdf4-python to access netcdf data (can also handle GRIB).
+- [Dask](https://dask.org/): Virtual large arrays (from netcdf variables) with lazy evaluation.
+- [cf-python](https://cfpython.bitbucket.io/): Implements the [CF](http://cfconventions.org) data model for the reading, writing and processing of data and metadata.

 %% Cell type:markdown id: tags:

 # Writing netCDF data

 **Important Note**: when running this notebook interactively in a browser, you probably will not be able to execute individual cells out of order without getting an error.  Instead, choose "Run All" from the Cell menu after you modify a cell.

 %% Cell type:code id: tags:

 ``` python
 import netCDF4     # Note: python is case-sensitive!
 import numpy as np
 ```

 %% Cell type:markdown id: tags:

 ## Opening a file, creating a new Dataset

 Let's create a new, empty netCDF file named 'data/new.nc', opened for writing.

 Be careful, opening a file with 'w' will clobber any existing data (unless `clobber=False` is used, in which case an exception is raised if the file already exists).

 - `mode='r'` is the default.
 - `mode='a'` opens an existing file and allows for appending (does not clobber existing data)
 - `format` can be one of `NETCDF3_CLASSIC`, `NETCDF3_64BIT`, `NETCDF4_CLASSIC` or `NETCDF4` (default). `NETCDF4_CLASSIC` uses HDF5 for the underlying storage layer (as does `NETCDF4`) but enforces the classic netCDF 3 data model so data can be read with older clients.

 %% Cell type:code id: tags:

 ``` python
 try: ncfile.close()  # just to be safe, make sure dataset is not already open.
 except: pass
 ncfile = netCDF4.Dataset('data/new.nc',mode='w',format='NETCDF4_CLASSIC')
 print(ncfile)
 ```

 %% Output

    <type 'netCDF4._netCDF4.Dataset'>
    root group (NETCDF4_CLASSIC data model, file format HDF5):
        dimensions(sizes):
        variables(dimensions):
        groups:
    

 %% Cell type:markdown id: tags:

 ## Creating dimensions

 The **ncfile** object we created is a container for _dimensions_, _variables_, and _attributes_.   First, let's create some dimensions using the [`createDimension`](http://unidata.github.io/netcdf4-python/netCDF4.Dataset-class.html#createDimension) method.

 - Every dimension has a name and a length.
 - The name is a string that is used to specify the dimension to be used when creating a variable, and as a key to access the dimension object in the `ncfile.dimensions` dictionary.

 Setting the dimension length to `0` or `None` makes it unlimited, so it can grow.

 - For `NETCDF4` files, any variable's dimension can be unlimited.
 - For `NETCDF4_CLASSIC` and `NETCDF3*` files, only one per variable can be unlimited, and it must be the leftmost (fastest varying) dimension.

 %% Cell type:code id: tags:

 ``` python
 lat_dim = ncfile.createDimension('lat', 73)     # latitude axis
 lon_dim = ncfile.createDimension('lon', 144)    # longitude axis
 time_dim = ncfile.createDimension('time', None) # unlimited axis (can be appended to).
 for dim in ncfile.dimensions.items():
    print(dim)
 ```

 %% Output

    ('lat', <type 'netCDF4._netCDF4.Dimension'>: name = 'lat', size = 73
    )
    ('lon', <type 'netCDF4._netCDF4.Dimension'>: name = 'lon', size = 144
    )
    ('time', <type 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'time', size = 0
    )

 %% Cell type:markdown id: tags:

 ## Creating attributes

 netCDF attributes can be created just like you would for any python object.

 - Best to adhere to established conventions (like the [CF](http://cfconventions.org/) conventions)
 - We won't try to adhere to any specific convention here though.

 %% Cell type:code id: tags:

 ``` python
 ncfile.title='My model data'
 print(ncfile.title)
 ```

 %% Output

    My model data

 %% Cell type:markdown id: tags:

 Try adding some more attributes...

 %% Cell type:markdown id: tags:

 ## Creating variables

 Now let's add some variables and store some data in them.

 - A variable has a name, a type, a shape, and some data values.
 - The shape of a variable is specified by a tuple of dimension names.
 - A variable should also have some named attributes, such as 'units', that describe the data.

 The [`createVariable`](http://unidata.github.io/netcdf4-python/netCDF4.Dataset-class.html#createVariable) method takes 3 mandatory args.

 - the 1st argument is the variable name (a string). This is used as the key to access the variable object from the `variables` dictionary.
 - the 2nd argument is the datatype (most numpy datatypes supported).
 - the third argument is a tuple containing the dimension names (the dimensions must be created first).  Unless this is a `NETCDF4` file, any unlimited dimension must be the leftmost one.
 - there are lots of optional arguments (many of which are only relevant when `format='NETCDF4'`) to control compression, chunking, fill_value, etc.

 %% Cell type:code id: tags:

 ``` python
 # Define two variables with the same names as dimensions,
 # a conventional way to define "coordinate variables".
 lat = ncfile.createVariable('lat', np.float32, ('lat',))
 lat.units = 'degrees_north'
 lat.long_name = 'latitude'
 lon = ncfile.createVariable('lon', np.float32, ('lon',))
 lon.units = 'degrees_east'
 lon.long_name = 'longitude'
 time = ncfile.createVariable('time', np.float64, ('time',))
 time.units = 'hours since 1800-01-01'
 time.long_name = 'time'
 # Define a 3D variable to hold the data
 temp = ncfile.createVariable('temp',np.float64,('time','lat','lon')) # note: unlimited dimension is leftmost
 temp.units = 'K' # degrees Kelvin
 temp.standard_name = 'air_temperature' # this is a CF standard name
 print(temp)
 ```

 %% Output

    <type 'netCDF4._netCDF4.Variable'>
    float64 temp(time, lat, lon)
        units: K
        standard_name: air_temperature
    unlimited dimensions: time
    current shape = (0, 73, 144)
    filling on, default _FillValue of 9.96920996839e+36 used
    

 %% Cell type:markdown id: tags:

 ## Pre-defined variable attributes (read only)

 The netCDF4 module provides some useful pre-defined Python attributes for netCDF variables, such as dimensions, shape, dtype, ndim.

 Note: since no data has been written yet, the length of the 'time' dimension is 0.

 %% Cell type:code id: tags:

 ``` python
 print("-- Some pre-defined attributes for variable temp:")
 print("temp.dimensions:", temp.dimensions)
 print("temp.shape:", temp.shape)
 print("temp.dtype:", temp.dtype)
 print("temp.ndim:", temp.ndim)
 ```

 %% Output

    -- Some pre-defined attributes for variable temp:
    ('temp.dimensions:', (u'time', u'lat', u'lon'))
    ('temp.shape:', (0, 73, 144))
    ('temp.dtype:', dtype('float64'))
    ('temp.ndim:', 3)

 %% Cell type:markdown id: tags:

 ## Writing data

-To write data a netCDF variable object, just treat it like a numpy array and assign values to a slice.
+To write data to a netCDF variable object, just treat it like a numpy array and assign values to a slice.

 %% Cell type:code id: tags:

 ``` python
 nlats = len(lat_dim); nlons = len(lon_dim); ntimes = 3
 # Write latitudes, longitudes.
 # Note: the ":" is necessary in these "write" statements
 lat[:] = -90. + (180./nlats)*np.arange(nlats) # south pole to north pole
 lon[:] = (180./nlats)*np.arange(nlons) # Greenwich meridian eastward
 # create a 3D array of random numbers
 data_arr = np.random.uniform(low=280,high=330,size=(ntimes,nlats,nlons))
 # Write the data.  This writes the whole 3D netCDF variable all at once.
 temp[:,:,:] = data_arr  # Appends data along unlimited dimension
 print("-- Wrote data, temp.shape is now ", temp.shape)
 # read data back from variable (by slicing it), print min and max
 print("-- Min/Max values:", temp[:,:,:].min(), temp[:,:,:].max())
 ```

 %% Output

    ('-- Wrote data, temp.shape is now ', (3, 73, 144))
    ('-- Min/Max values:', 280.00283562143028, 329.99987991477548)

 %% Cell type:markdown id: tags:

 - You can just treat a netCDF Variable object like a numpy array and assign values to it.
 - Variables automatically grow along unlimited dimensions (unlike numpy arrays)
 - The above writes the whole 3D variable all at once,  but you can write it a slice at a time instead.

 Let's add another time slice....

 %% Cell type:code id: tags:

 ``` python
 # create a 2D array of random numbers
 data_slice = np.random.uniform(low=280,high=330,size=(nlats,nlons))
 temp[3,:,:] = data_slice   # Appends the 4th time slice
 print("-- Wrote more data, temp.shape is now ", temp.shape)
 ```

 %% Output

    ('-- Wrote more data, temp.shape is now ', (4, 73, 144))

 %% Cell type:markdown id: tags:

 Note that we have not yet written any data to the time variable.  It automatically grew as we appended data along the time dimension to the variable `temp`, but the data is missing.

 %% Cell type:code id: tags:

 ``` python
 print(time)
 times_arr = time[:]
 print(type(times_arr),times_arr)  # dashes indicate masked values (where data has not yet been written)
 ```

 %% Output

    <type 'netCDF4._netCDF4.Variable'>
    float64 time(time)
        units: hours since 1800-01-01
        long_name: time
    unlimited dimensions: time
    current shape = (4,)
    filling on, default _FillValue of 9.96920996839e+36 used
    
    (<class 'numpy.ma.core.MaskedArray'>, masked_array(data = [-- -- -- --],
                 mask = [ True  True  True  True],
           fill_value = 9.96920996839e+36)
    )

 %% Cell type:markdown id: tags:

 Let's add write some data into the time variable.

 - Given a set of datetime instances, use date2num to convert to numeric time values and then write that data to the variable.

 %% Cell type:code id: tags:

 ``` python
 from datetime import datetime
 from netCDF4 import date2num,num2date
 # 1st 4 days of October.
 dates = [datetime(2014,10,1,0),datetime(2014,10,2,0),datetime(2014,10,3,0),datetime(2014,10,4,0)]
 print(dates)
 times = date2num(dates, time.units)
 print(times, time.units) # numeric values
 time[:] = times
 # read time data back, convert to datetime instances, check values.
 print(num2date(time[:],time.units))
 ```

 %% Output

    [datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 2, 0, 0), datetime.datetime(2014, 10, 3, 0, 0), datetime.datetime(2014, 10, 4, 0, 0)]
    (array([ 1882440.,  1882464.,  1882488.,  1882512.]), u'hours since 1800-01-01')
    [datetime.datetime(2014, 10, 1, 0, 0) datetime.datetime(2014, 10, 2, 0, 0)
     datetime.datetime(2014, 10, 3, 0, 0) datetime.datetime(2014, 10, 4, 0, 0)]

 %% Cell type:markdown id: tags:

 ## Closing a netCDF file

 It's **important** to close a netCDF file you opened for writing:

 - flushes buffers to make sure all data gets written
 - releases memory resources used by open netCDF files

 %% Cell type:code id: tags:

 ``` python
 # first print the Dataset object to see what we've got
 print(ncfile)
 # close the Dataset.
 ncfile.close(); print('Dataset is closed!')
 ```

 %% Output

    <type 'netCDF4._netCDF4.Dataset'>
    root group (NETCDF4_CLASSIC data model, file format HDF5):
        title: My model data
        dimensions(sizes): lat(73), lon(144), time(4)
        variables(dimensions): float32 [4mlat[0m(lat), float32 [4mlon[0m(lon), float64 [4mtime[0m(time), float64 [4mtemp[0m(time,lat,lon)
        groups:
    
    Dataset is closed!

 %% Cell type:markdown id: tags:

 # Advanced features

 So far we've only exercised features associated with the old netCDF version 3 data model.  netCDF version 4 adds a lot of new functionality that comes with the more flexible HDF5 storage layer.

 Let's create a new file with `format='NETCDF4'` so we can try out some of these features.

 %% Cell type:code id: tags:

 ``` python
 ncfile = netCDF4.Dataset('data/new2.nc','w',format='NETCDF4')
 print(ncfile)
 ```

 %% Output

    <type 'netCDF4._netCDF4.Dataset'>
    root group (NETCDF4 data model, file format HDF5):
        dimensions(sizes):
        variables(dimensions):
        groups:
    

 %% Cell type:markdown id: tags:

 ## Creating Groups

 netCDF version 4 added support for organizing data in hierarchical groups.

 - analagous to directories in a filesystem.
 - Groups serve as containers for variables, dimensions and attributes, as well as other groups.
 - A `netCDF4.Dataset` creates a special group, called the 'root group', which is similar to the root directory in a unix filesystem.

 - groups are created using the [`createGroup`](http://unidata.github.io/netcdf4-python/netCDF4.Dataset-class.html#createGroup) method.
 - takes a single argument (a string, which is the name of the Group instance).  This string is used as a key to access the group instances in the `groups` dictionary.

 Here we create two groups to hold data for two different model runs.

 %% Cell type:code id: tags:

 ``` python
 grp1 = ncfile.createGroup('model_run1')
 grp2 = ncfile.createGroup('model_run2')
 for grp in ncfile.groups.items():
    print(grp)
 ```

 %% Output

    ('model_run1', <type 'netCDF4._netCDF4.Group'>
    group /model_run1:
        dimensions(sizes):
        variables(dimensions):
        groups:
    )
    ('model_run2', <type 'netCDF4._netCDF4.Group'>
    group /model_run2:
        dimensions(sizes):
        variables(dimensions):
        groups:
    )

 %% Cell type:markdown id: tags:

 Create some dimensions in the root group.

 %% Cell type:code id: tags:

 ``` python
 lat_dim = ncfile.createDimension('lat', 73)     # latitude axis
 lon_dim = ncfile.createDimension('lon', 144)    # longitude axis
 time_dim = ncfile.createDimension('time', None) # unlimited axis (can be appended to).
 ```

 %% Cell type:markdown id: tags:

 Now create a variable in grp1 and grp2.  The library will search recursively upwards in the group tree to find the dimensions (which in this case are defined one level up).

 - These variables are create with **zlib compression**, another nifty feature of netCDF 4.
 - The data are automatically compressed when data is written to the file, and uncompressed when the data is read.
 - This can really save disk space, especially when used in conjunction with the [**least_significant_digit**](http://unidata.github.io/netcdf4-python/netCDF4.Dataset-class.html#createVariable) keyword argument, which causes the data to be quantized (truncated) before compression.  This makes the compression lossy, but more efficient.

 %% Cell type:code id: tags:

 ``` python
 temp1 = grp1.createVariable('temp',np.float64,('time','lat','lon'),zlib=True)
 temp2 = grp2.createVariable('temp',np.float64,('time','lat','lon'),zlib=True)
 for grp in ncfile.groups.items():  # shows that each group now contains 1 variable
    print(grp)
 ```

 %% Output

    ('model_run1', <type 'netCDF4._netCDF4.Group'>
    group /model_run1:
        dimensions(sizes):
        variables(dimensions): float64 [4mtemp[0m(time,lat,lon)
        groups:
    )
    ('model_run2', <type 'netCDF4._netCDF4.Group'>
    group /model_run2:
        dimensions(sizes):
        variables(dimensions): float64 [4mtemp[0m(time,lat,lon)
        groups:
    )

 %% Cell type:markdown id: tags:

 ##Creating a variable with a compound data type

 - Compound data types map directly to numpy structured (a.k.a 'record' arrays).
 - Structured arrays are akin to C structs, or derived types in Fortran.
 - They allow for the construction of table-like structures composed of combinations of other data types, including other compound types.
 - Might be useful for representing multiple parameter values at each point on a grid, or at each time and space location for scattered (point) data.

 Here we create a variable with a compound data type to represent complex data (there is no native complex data type in netCDF).

 - The compound data type is created with the [`createCompoundType`](http://unidata.github.io/netcdf4-python/netCDF4.Dataset-class.html#createCompoundType) method.

 %% Cell type:code id: tags:

 ``` python
 # create complex128 numpy structured data type
 complex128 = np.dtype([('real',np.float64),('imag',np.float64)])
 # using this numpy dtype, create a netCDF compound data type object
 # the string name can be used as a key to access the datatype from the cmptypes dictionary.
 complex128_t = ncfile.createCompoundType(complex128,'complex128')
 # create a variable with this data type, write some data to it.
 cmplxvar = grp1.createVariable('cmplx_var',complex128_t,('time','lat','lon'))
 # write some data to this variable
 # first create some complex random data
 nlats = len(lat_dim); nlons = len(lon_dim)
 data_arr_cmplx = np.random.uniform(size=(nlats,nlons))+1.j*np.random.uniform(size=(nlats,nlons))
 # write this complex data to a numpy complex128 structured array
 data_arr = np.empty((nlats,nlons),complex128)
 data_arr['real'] = data_arr_cmplx.real; data_arr['imag'] = data_arr_cmplx.imag
 cmplxvar[0] = data_arr  # write the data to the variable (appending to time dimension)
 print(cmplxvar)
 data_out = cmplxvar[0] # read one value of data back from variable
 print(data_out.dtype, data_out.shape, data_out[0,0])
 ```

 %% Output

    <type 'netCDF4._netCDF4.Variable'>
    compound cmplx_var(time, lat, lon)
    compound data type: [('real', '<f8'), ('imag', '<f8')]
    path = /model_run1
    unlimited dimensions: time
    current shape = (1, 73, 144)
    
    (dtype([('real', '<f8'), ('imag', '<f8')]), (73, 144), (0.578177705604801, 0.18086070805676357))

 %% Cell type:markdown id: tags:

 ##Creating a variable with a variable-length (vlen) data type

 netCDF 4 has support for variable-length or "ragged" arrays. These are arrays of variable length sequences having the same type.

 - To create a variable-length data type, use the [`createVLType`](http://unidata.github.io/netcdf4-python/netCDF4.Dataset-class.html#createVLType) method.
 - The numpy datatype of the variable-length sequences and the name of the new datatype must be specified.

 %% Cell type:code id: tags:

 ``` python
 vlen_t = ncfile.createVLType(np.int64, 'phony_vlen')
 ```

 %% Cell type:markdown id: tags:

 A new variable can then be created using this datatype.

 %% Cell type:code id: tags:

 ``` python
 vlvar = grp2.createVariable('phony_vlen_var', vlen_t, ('time','lat','lon'))
 ```

 %% Cell type:markdown id: tags:

 Since there is no native vlen datatype in numpy, vlen arrays are represented in python as object arrays (arrays of dtype `object`).

 - These are arrays whose elements are Python object pointers, and can contain any type of python object.
 - For this application, they must contain 1-D numpy arrays all of the same type but of varying length.
 - Fill with 1-D random numpy int64 arrays of random length between 1 and 10.

 %% Cell type:code id: tags:

 ``` python
 vlen_data = np.empty((nlats,nlons),object)
 for i in range(nlons):
    for j in range(nlats):
        size = np.random.randint(1,10,size=1) # random length of sequence
        vlen_data[j,i] = np.random.randint(0,10,size=size)# generate random sequence
 vlvar[0] = vlen_data # append along unlimited dimension (time)
 print(vlvar)
 print('data =\n',vlvar[:])
 ```

 %% Output

    <type 'netCDF4._netCDF4.Variable'>
    vlen phony_vlen_var(time, lat, lon)
    vlen data type: int64
    path = /model_run2
    unlimited dimensions: time
    current shape = (1, 73, 144)
    
    ('data =\n', array([[[array([0, 4, 0, 9, 2, 2, 2, 4, 2]), array([7, 5, 4, 4, 9, 8, 0]),
             array([3, 6, 6, 8, 2, 7]), ..., array([5, 0, 0, 8, 8, 1, 5, 3]),
             array([4, 2, 7]), array([0])],
            [array([5, 6, 6, 6, 1, 0, 7]), array([7]),
             array([7, 5, 8, 9, 6, 9, 3]), ..., array([0, 6, 5, 4]),
             array([7, 1, 9, 7, 7, 2]), array([1, 4, 0])],
            [array([4, 3, 1]), array([6, 3, 9, 7, 8]), array([8]), ...,
             array([6, 5, 8, 0]), array([0]), array([0, 9, 6, 2, 4])],
            ...,
            [array([8, 4, 4]), array([4, 1, 6]), array([1, 4, 2, 3, 9]), ...,
             array([9, 1]), array([7, 2, 5, 1, 5, 8, 2]),
             array([2, 9, 9, 1, 4, 6, 3, 5, 2])],
            [array([4, 7, 9, 8, 2, 3, 6, 6]),
             array([1, 4, 1, 6, 1, 1, 2, 3, 9]),
             array([9, 5, 6, 2, 4, 3, 8, 2, 9]), ..., array([9, 5, 7]),
             array([3, 9]), array([4, 2, 6, 9])],
            [array([8, 9, 9, 2, 2, 8, 8, 5]), array([3]),
             array([8, 8, 0, 2, 9, 2, 3, 0, 9]), ..., array([7]),
             array([5, 1, 0, 6, 8, 6]), array([8, 6, 3, 6, 9, 8, 4, 2, 5])]]], dtype=object))

 %% Cell type:markdown id: tags:

 Close the Dataset and examine the contents with ncdump.

 %% Cell type:code id: tags:

 ``` python
 ncfile.close()
 !ncdump -h data/new2.nc
 ```

 %% Output

    netcdf new2 {
    types:
      compound complex128 {
        double real ;
        double imag ;
      }; // complex128
      int64(*) phony_vlen ;
    dimensions:
    	lat = 73 ;
    	lon = 144 ;
    	time = UNLIMITED ; // (1 currently)
    
    group: model_run1 {
      variables:
      	double temp(time, lat, lon) ;
      	complex128 cmplx_var(time, lat, lon) ;
      } // group model_run1
    
    group: model_run2 {
      variables:
      	double temp(time, lat, lon) ;
      	phony_vlen phony_vlen_var(time, lat, lon) ;
      } // group model_run2
    }

 %% Cell type:markdown id: tags:

 ##Other interesting and useful projects using netcdf4-python

- [Xray](http://xray.readthedocs.org/en/stable/): N-dimensional variant of the core [pandas](http://pandas.pydata.org) data structure that can operate on netcdf variables.
- [Iris](http://scitools.org.uk/iris/): a data model to create a data abstraction layer which isolates analysis and visualisation code from data format specifics.  Uses netcdf4-python to access netcdf data (can also handle GRIB).
- [Biggus](https://github.com/SciTools/biggus): Virtual large arrays (from netcdf variables) with lazy evaluation.
- [cf-python](http://cfpython.bitbucket.org/): Implements the [CF](http://cfconventions.org) data model for the reading, writing and processing of data and metadata.
+- [xarray](https://xarray.pydata.org/en/stable/): N-dimensional variant of the core [pandas](https://pandas.pydata.org) data structure that can operate on netcdf variables.
+- [Iris](https://scitools.org.uk/iris/docs/latest/): a data model to create a data abstraction layer which isolates analysis and visualisation code from data format specifics.  Uses netcdf4-python to access netcdf data (can also handle GRIB).
+- [Dask](https://dask.org/): Virtual large arrays (from netcdf variables) with lazy evaluation.
+- [cf-python](https://cfpython.bitbucket.io/): Implements the [CF](http://cfconventions.org) data model for the reading, writing and processing of data and metadata.

--- a/include/membuf.pyx
+++ b/include/membuf.pyx
+# Creates a memoryview from a malloced C pointer,
+# which will be freed when the python object is garbage collected.
+# Code found here is derived from
+# http://stackoverflow.com/a/28166272/428751
+from cpython.buffer cimport PyBuffer_FillInfo
+from libc.stdlib cimport free
+
+# create a python memoryview object from a raw pointer.
+cdef memview_fromptr(void *memory, size_t size):
+    cdef _MemBuf buf = _MemBuf()
+    buf.memory = memory # malloced void pointer
+    buf.size = size # size of pointer in bytes
+    return memoryview(buf)
+
+# private extension type that implements buffer protocal.
+cdef class _MemBuf:
+    cdef const void *memory
+    cdef size_t size
+    def __getbuffer__(self, Py_buffer *buf, int flags):
+        PyBuffer_FillInfo(buf, self, <void *>self.memory, self.size, 1, flags)
+    def __releasebuffer__(self, Py_buffer *buf):
+        # why doesn't this do anything??
+        pass
+    def __dealloc__(self):
+        free(self.memory)
--- a/include/netCDF4.pxi
+++ b/include/netCDF4.pxi
@@ -694,6 +694,15 @@ IF HAS_NC_OPEN_MEM:
    cdef extern from "netcdf_mem.h":
        int nc_open_mem(const char *path, int mode, size_t size, void* memory, int *ncidp)

+IF HAS_NC_CREATE_MEM:
+    cdef extern from "netcdf_mem.h":
+        int nc_create_mem(const char *path, int mode, size_t initialize, int *ncidp);
+        ctypedef struct NC_memio:
+            size_t size
+            void* memory
+            int flags
+        int nc_close_memio(int ncid, NC_memio* info);
+
 IF HAS_NC_PAR:
    cdef extern from "mpi-compat.h": pass
    cdef extern from "netcdf_par.h":

--- a/netCDF4/__init__.py
+++ b/netCDF4/__init__.py
@@ -6,6 +6,6 @@ from ._netCDF4 import __doc__, __pdoc__
 from ._netCDF4 import (__version__, __netcdf4libversion__, __hdf5libversion__,
                       __has_rename_grp__, __has_nc_inq_path__,
                       __has_nc_inq_format_extended__, __has_nc_open_mem__,
-                       __has_cdf5_format__,__has_nc_par__)
+                       __has_nc_create_mem__,__has_cdf5_format__,__has_nc_par__)
 __all__ =\
 ['Dataset','Variable','Dimension','Group','MFDataset','MFTime','CompoundType','VLType','date2num','num2date','date2index','stringtochar','chartostring','stringtoarr','getlibversion','EnumType']
--- a/netCDF4/_netCDF4.pyx
+++ b/netCDF4/_netCDF4.pyx
--- a/netCDF4/utils.py
+++ b/netCDF4/utils.py
@@ -86,7 +86,7 @@ least_significant_digit=1, bits will be 4.
        return datout

 def _StartCountStride(elem, shape, dimensions=None, grp=None, datashape=None,\
-        put=False, no_get_vars = True):
+        put=False, use_get_vars = False):
    """Return start, count, stride and indices needed to store/extract data
    into/from a netCDF variable.

@@ -257,7 +257,7 @@ Boolean array must have the same shape as the data along this dimension."""
            newElem.append(e)
        # slice or ellipsis object
        elif type(e) == slice or type(e) == type(Ellipsis):
-            if no_get_vars and type(e) == slice and e.step not in [None,-1,1] and\
+            if not use_get_vars and type(e) == slice and e.step not in [None,-1,1] and\
               dimensions is not None and grp is not None:
                # convert strided slice to integer sequence if possible
                # (this will avoid nc_get_vars, which is slow - issue #680).
@@ -305,10 +305,10 @@ Boolean array must have the same shape as the data along this dimension."""
                ee = range(start,stop,step)
            except ValueError: # start, stop or step is not valid for a range
                ee = False
-            if no_get_vars and ee and len(e) == len(ee) and (e == np.arange(start,stop,step)).all():
+            if ee and len(e) == len(ee) and (e == np.arange(start,stop,step)).all():
                # don't convert to slice unless abs(stride) == 1
                # (nc_get_vars is very slow, issue #680)
-                if step not in [1,-1]:
+                if not use_get_vars and step not in [1,-1]:
                    newElem.append(e)
                else:
                    newElem.append(slice(start,stop,step))
@@ -473,9 +473,9 @@ def ncinfo():
    usage = """
 Print summary information about a netCDF file.

- usage: %s [-h] [-g grp or --group=grp] [-v var or --variable=var] [-d dim or --dimension=dim] filename
+ usage: %s [-h/--help] [-g grp or --group=grp] [-v var or --variable=var] [-d dim or --dimension=dim] filename

- -h -- Print usage message.
+ -h/--help -- Print usage message.
 -g <group name> or --group=<group name> -- Print info for this group
      (default is root group). Nested groups specified
      using posix paths ("group1/group2/group3").
@@ -499,7 +499,7 @@ def ncinfo():
    # Get the options
    group = None; var = None; dim=None
    for option in opts:
-        if option[0] == '-h':
+        if option[0] == '-h' or option[0] == '--help':
            sys.stderr.write(usage)
            sys.exit(0)
        elif option[0] == '--group' or option[0] == '-g':
@@ -514,7 +514,12 @@ def ncinfo():
            sys.exit(0)

    # filename passed as last argumenbt
+    try:
        filename = pargs[-1]
+    except IndexError:
+        sys.stdout.write("You need to pass netcdf filename!\n.")
+        sys.stderr.write(usage)
+        sys.exit(0)

    f = Dataset(filename)
    if group is None:
@@ -609,8 +614,8 @@ def nc4tonc3():
    usage = """
 Convert a netCDF 4 file (in NETCDF4_CLASSIC format) to netCDF 3 format.

- usage: %s [-h] [-o] [--chunk] netcdf4filename netcdf3filename
- -h -- Print usage message.
+ usage: %s [-h/--help] [-o] [--chunk] netcdf4filename netcdf3filename
+ -h/--help -- Print usage message.
 -o -- Overwrite destination file (default is to raise an error if output file already exists).
 --quiet=(0|1)  -- if 1, don't print diagnostic information.
 --format -- netcdf3 format to use (NETCDF3_64BIT by default, can be set to NETCDF3_CLASSIC)
@@ -636,7 +641,7 @@ def nc4tonc3():

    # Get the options
    for option in opts:
-        if option[0] == '-h':
+        if option[0] == '-h' or option[0] == '--help':
            sys.stderr.write(usage)
            sys.exit(0)
        elif option[0] == '-o':
@@ -793,8 +798,8 @@ def nc3tonc4():
 to floats, and adding zlib compression (with the HDF5 shuffle filter and fletcher32 checksum).
 Data may also be quantized (truncated) to a specified precision to improve compression.

- usage: %s [-h] [-o] [--vars=var1,var2,..] [--zlib=(0|1)] [--complevel=(1-9)] [--shuffle=(0|1)] [--fletcher32=(0|1)] [--unpackshort=(0|1)] [--quantize=var1=n1,var2=n2,..] netcdf3filename netcdf4filename
- -h -- Print usage message.
+ usage: %s [-h/--help] [-o] [--vars=var1,var2,..] [--zlib=(0|1)] [--complevel=(1-9)] [--shuffle=(0|1)] [--fletcher32=(0|1)] [--unpackshort=(0|1)] [--quantize=var1=n1,var2=n2,..] netcdf3filename netcdf4filename
+ -h/--help -- Print usage message.
 -o -- Overwrite destination file (default is to raise an error if output file already exists).
 --vars -- comma separated list of variable names to copy (default is to copy
    all variables)
@@ -859,7 +864,7 @@ def nc3tonc4():

    # Get the options
    for option in opts:
-        if option[0] == '-h':
+        if option[0] == '-h' or option[0] == '--help':
            sys.stderr.write(usage)
            sys.exit(0)
        elif option[0] == '-o':

--- a/setup.py
+++ b/setup.py
@@ -55,6 +55,7 @@ def check_api(inc_dirs):
    has_nc_inq_format_extended = False
    has_cdf5_format = False
    has_nc_open_mem = False
+    has_nc_create_mem = False
    has_nc_par = False

    for d in inc_dirs:
@@ -76,6 +77,15 @@ def check_api(inc_dirs):
            if line.startswith('#define NC_FORMAT_64BIT_DATA'):
                has_cdf5_format = True

+        if has_nc_open_mem:
+            try:
+                f = open(os.path.join(d, 'netcdf_mem.h'), **open_kwargs)
+            except IOError:
+                continue
+            for line in f:
+                if line.startswith('EXTERNL int nc_create_mem'):
+                    has_nc_create_mem = True
+
        ncmetapath = os.path.join(d,'netcdf_meta.h')
        if os.path.exists(ncmetapath):
            for line in open(ncmetapath):
@@ -84,7 +94,7 @@ def check_api(inc_dirs):
        break

    return has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \
-           has_cdf5_format, has_nc_open_mem, has_nc_par
+           has_cdf5_format, has_nc_open_mem, has_nc_create_mem, has_nc_par


 def getnetcdfvers(libdirs):
@@ -478,7 +488,7 @@ if 'sdist' not in sys.argv[1:] and 'clean' not in sys.argv[1:]:
            os.remove(netcdf4_src_c)
    # this determines whether renameGroup and filepath methods will work.
    has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \
-        has_cdf5_format, has_nc_open_mem, has_nc_par = check_api(inc_dirs)
+    has_cdf5_format, has_nc_open_mem, has_nc_create_mem, has_nc_par = check_api(inc_dirs)
    # for netcdf 4.4.x CDF5 format is always enabled.
    if netcdf_lib_version is not None and\
       (netcdf_lib_version > "4.4" and netcdf_lib_version < "4.5"):
@@ -520,6 +530,13 @@ if 'sdist' not in sys.argv[1:] and 'clean' not in sys.argv[1:]:
        sys.stdout.write('netcdf lib does not have nc_open_mem function\n')
        f.write('DEF HAS_NC_OPEN_MEM = 0\n')

+    if has_nc_create_mem:
+        sys.stdout.write('netcdf lib has nc_create_mem function\n')
+        f.write('DEF HAS_NC_CREATE_MEM = 1\n')
+    else:
+        sys.stdout.write('netcdf lib does not have nc_create_mem function\n')
+        f.write('DEF HAS_NC_CREATE_MEM = 0\n')
+
    if has_cdf5_format:
        sys.stdout.write('netcdf lib has cdf-5 format capability\n')
        f.write('DEF HAS_CDF5_FORMAT = 1\n')
@@ -553,7 +570,7 @@ else:

 setup(name="netCDF4",
      cmdclass=cmdclass,
-      version="1.4.2",
+      version="1.4.3",
      long_description="netCDF version 4 has many features not found in earlier versions of the library, such as hierarchical groups, zlib compression, multiple unlimited dimensions, and new data types.  It is implemented on top of HDF5.  This module implements most of the new features, and can read and write netCDF files compatible with older versions of the library.  The API is modelled after Scientific.IO.NetCDF, and should be familiar to users of that module.\n\nThis project is hosted on a `GitHub repository <https://github.com/Unidata/netcdf4-python>`_ where you may access the most up-to-date source.",
      author="Jeff Whitaker",
      author_email="jeffrey.s.whitaker@noaa.gov",

--- a/test/run_all.py
+++ b/test/run_all.py
 import glob, os, sys, unittest, struct
 from netCDF4 import getlibversion,__hdf5libversion__,__netcdf4libversion__,__version__
-from netCDF4 import __has_cdf5_format__, __has_nc_inq_path__, __has_nc_par__
+from netCDF4 import __has_cdf5_format__, __has_nc_inq_path__, __has_nc_par__,\
+                    __has_nc_create_mem__

 # can also just run
 # python -m unittest discover . 'tst*py'
@@ -21,6 +22,9 @@ if __netcdf4libversion__ < '4.2.1' or __has_nc_par__:
 if not __has_nc_inq_path__:
    test_files.remove('tst_filepath.py')
    sys.stdout.write('not running tst_filepath.py ...\n')
+if not __has_nc_create_mem__:
+    test_files.remove('tst_create_mem.py')
+    sys.stdout.write('not running tst_create_mem.py ...\n')
 if not __has_cdf5_format__ or struct.calcsize("P") < 8:
    test_files.remove('tst_cdf5.py')
    sys.stdout.write('not running tst_cdf5.py ...\n')
@@ -29,6 +33,11 @@ if not __has_cdf5_format__ or struct.calcsize("P") < 8:
 if os.getenv('NO_NET'):
    test_files.remove('tst_dap.py');
    sys.stdout.write('not running tst_dap.py ...\n')
+else:
+    # run opendap test first (issue #856).
+    test_files.remove('tst_dap.py')
+    test_files.insert(0,'tst_dap.py')
+

 # Build the test suite from the tests found in the test files.
 testsuite = unittest.TestSuite()

--- a/test/tst_atts.py
+++ b/test/tst_atts.py
@@ -106,10 +106,23 @@ class VariablesTestCase(unittest.TestCase):
        f.cafe = u'caf\xe9' # NC_STRING
        f.batt = u'caf\xe9'.encode('utf-8') #NC_CHAR
        v.setncattr_string('stringatt','bar') # NC_STRING
+        # issue #882 - provide an option to always string attribute
+        # as NC_STRINGs. Testing various approaches to setting text attributes...
+        f.set_ncstring_attrs(True)
+        f.stringatt_ncstr = u'foo' # will now be written as NC_STRING
+        f.setncattr_string('stringatt_ncstr','bar') # NC_STRING anyway
+        f.caf_ncstr = u'caf\xe9' # NC_STRING anyway
+        f.bat_ncstr = u'caf\xe9'.encode('utf-8') # now NC_STRING
+        g.stratt_ncstr = STRATT # now NC_STRING
+        #g.renameAttribute('stratt_tmp','stratt_ncstr')
+        v.setncattr_string('stringatt_ncstr','bar') # NC_STRING anyway
+        v.stratt_ncstr = STRATT
+        v1.emptystratt_ncstr = EMPTYSTRATT
        f.close()

    def tearDown(self):
        # Remove the temporary files
+        #pass
        os.remove(self.file)

    def runTest(self):
@@ -170,10 +183,12 @@ class VariablesTestCase(unittest.TestCase):
                ncdump_output = str(dep,encoding='utf-8').split('\n')
            for line in ncdump_output:
                line = line.strip('\t\n\r')
+                line = line.strip()# Must be done another time for group variables
                if "stringatt" in line: assert line.startswith('string')
                if "charatt" in line: assert line.startswith(':')
                if "cafe" in line: assert line.startswith('string')
                if "batt" in line: assert line.startswith(':')
+                if "_ncstr" in line: assert line.startswith('string')
        # check attributes in subgroup.
        # global attributes.
        for key,val in ATTDICT.items():

--- a/test/tst_cdf5.py
+++ b/test/tst_cdf5.py
@@ -18,6 +18,8 @@ class test_cdf5(unittest.TestCase):
        # create an 8-bit unsigned integer variable
        v = nc.createVariable('var',np.uint8,'dim')
        v[:ndim] = arrdata
+        # create a 64-bit integer attribute (issue #878)
+        nc.setncattr('int64_attr', np.int64(-9223372036854775806))
        nc.close()

    def tearDown(self):
@@ -29,6 +31,7 @@ class test_cdf5(unittest.TestCase):
        f  = Dataset(self.netcdf_file, 'r')
        assert f.dimensions['dim'].size == dimsize
        assert_array_equal(arrdata, f.variables['var'][:ndim])
+        assert (type(f.int64_attr) == np.int64)
        f.close()

 if __name__ == '__main__':