Andreas Tille · Andreas Tille · 70ab21a1 · 70ab21a1 · 70ab21a1 · 70ab21a1
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@ doc/_build
 *~
 *.pyc
 *.egg-info
+.eggs/
 nosetests.xml
 coverage.xml
 .coverage

--- a/.pylintrc
+++ b/.pylintrc
 [MASTER]
-extension-pkg-whitelist=numpy,xlml.etree,pysam.samtools,h5py.h5s
+extension-pkg-whitelist=numpy,xlml.etree,pysam.samtools

 [TYPECHECK]

@@ -7,7 +7,7 @@ extension-pkg-whitelist=numpy,xlml.etree,pysam.samtools,h5py.h5s
 # (useful for modules/projects where namespaces are manipulated during runtime
 # and thus existing member attributes cannot be deduced by static analysis. It
 # supports qualified module names, as well as Unix pattern matching.
-ignored-modules=numpy,lxml.etree,pysam.samtools,h5py.h5s
+ignored-modules=numpy,lxml.etree,pysam.samtools

 # List of classes names for which member attributes should not be checked
 # (useful for classes with attributes dynamically set). This supports can work

--- a/CHANGELOG.org
+++ b/CHANGELOG.org
+* Version 2.0
+  - Python 3.7 port, removed support for Python 2.x
+
 * Version 1.4.0
  - Support for Vcf4Records and merging VCF files


--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,11 @@ install:
 	@pip install ./

 pylint:
-	pylint --errors-only --ignore=pyxb pbcore/
+	pylint --errors-only --ignore=pyxb --enable=C0411,W0702,W0401,W0611 pbcore/
+
+autopep8:
+	find pbcore -name "*.py" | xargs autopep8 -i
+	find tests -name "*.py" | xargs autopep8 -i

 clean: doc-clean
 	rm -rf build/;\
@@ -33,8 +37,7 @@ doctest:
 	cd doc && make doctest

 unit-test:
-	#nosetests --with-coverage --cover-xml-file=coverage.xml --cover-package=pbcore --cover-xml --with-xunit -v tests
-	pytest -v -n auto --dist=loadscope --durations=20 --junitxml=nosetests.xml --cov=./pbcore --cov-report=xml:coverage.xml tests/test_*.py
+	python setup.py test
 	sed -i -e 's@filename="@filename="./@g' coverage.xml

 test: doctest unit-test

--- a/bamboo_build.sh
+++ b/bamboo_build.sh
 #!/bin/bash
 type module >& /dev/null || . /mnt/software/Modules/current/init/bash
-module load python/2
+module load python/3
+module load htslib  # since pysam was built against this
 set -ex
 nproc

@@ -18,13 +19,10 @@ else
  WHEELHOUSE=/mnt/software/p/python/wheelhouse/develop
 fi

-rm -rf   build
+rm -rf build
 mkdir -p build/bin build/lib build/include build/share
-$PIP install --user --no-index --find-link $WHEELHOUSE --no-compile -e .[test]
-$PIP install --user --no-index --find-link $WHEELHOUSE pbtestdata
-$PIP install --user --no-index --find-link $WHEELHOUSE pytest-xdist
-$PIP install --user --no-index --find-link $WHEELHOUSE pytest-cov
-#$PIP install --user --no-index --find-link $WHEELHOUSE pytest-parallel # not sure why this fails
+$PIP install --user --no-index --find-link $WHEELHOUSE --no-compile -e '.[test]'
+
 pytest --trace-config --collect-only

 set +e

--- a/bamboo_wheel.sh
+++ b/bamboo_wheel.sh
@@ -23,18 +23,9 @@ export WHEELHOUSE=./wheelhouse
 # Give everybody read/write access.
 umask 0000

-
-module load python/2-UCS2
-make wheel
-
-# For now, we have only "any" wheels, so we do not need to build again.
-
-module unload python
-
-module load python/2-UCS4
+module load python/3
 make wheel

-
 # http://bamboo.pacificbiosciences.com:8085/build/admin/edit/defaultBuildArtifact.action?buildKey=SAT-TAGDEPS-JOB1
 # For old artifact config:
 #mkdir -p ./artifacts/gcc-6.4.0/wheelhouse

--- a/bin/updateXSDs.py
+++ b/bin/updateXSDs.py
 #!/usr/bin/env python

-from __future__ import print_function
 import argparse
 import tempfile
 import shutil

--- a/doc/conf.py
+++ b/doc/conf.py
@@ -11,10 +11,13 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.

+import pkg_resources
 import sys, os

-# don't forget to update setup.py and pbcore/__init__.py too
-__VERSION__ = '1.7.1'
+try:
+    __VERSION__ = pkg_resources.get_distribution('pbcore').version
+except Exception:
+    __VERSION__ = 'unknown'


 # If extensions (or modules to document with autodoc) are in another directory,

--- a/doc/pbcore.io.dataset.rst
+++ b/doc/pbcore.io.dataset.rst
@@ -301,6 +301,7 @@ do them using the API, rather than the CLI.
    # open:
    dsets = [AlignmentSet(fn) for fn in filename_list]
    # merge with + operator:
+    from functools import reduce
    dset = reduce(lambda x, y: x + y, dsets)

    # OR:

--- a/doc/pbcore.io.rst
+++ b/doc/pbcore.io.rst
@@ -4,114 +4,46 @@ pbcore.io
 The ``pbcore.io`` package provides a number of lightweight interfaces
 to PacBio data files and other standard bioinformatics file formats.
 Preferred usage is to import classes directly from the ``pbcore.io``
-package, e.g.::
-
-    >>> from pbcore.io import CmpH5Reader
+package.

 The classes within ``pbcore.io`` adhere to a few conventions, in order
 to provide a uniform API:

  - Each data file type is thought of as a container of a `Record`
-    type; all `Reader` classes support streaming access by iterating on the 
-    reader object, and
-    `CmpH5Reader`, `BasH5Reader` and `IndexedBarReader` additionally 
-    provide random-access
-    to alignments/reads.
-    
+    type; all `Reader` classes support streaming access by iterating on the
+    reader object, and `IndexedBarReader` additionally provides
+    random-access to alignments/reads.
+
    For example::
-    
+
      from pbcore.io import *
      with IndexedBamReader(filename) as f:
        for r in f:
            process(r)
-    
+
    To make scripts a bit more user friendly, a progress bar can be
    easily added using the `tqdm` third-party package::
-    
+
      from pbcore.io import *
      from tqdm import tqdm
      with IndexedBamReader(filename) as f:
        for r in tqdm(f):
            process(r)
-    

  - The constructor argument needed to instantiate `Reader` and
    `Writer` objects can be either a filename (which can be suffixed
-    by ".gz" for all but the h5 file types) or an open file handle.
+    by ".gz" for all file types) or an open file handle.
    The reader/writer classes will do what you would expect.


-  - The reader/writer classes all support the context manager idiom.
-    Meaning, if you write::
-
-      >>> with CmpH5Reader("aligned_reads.cmp.h5") as r:
-      ...   print r[0].read()
-
-    the `CmpH5Reader` object will be automatically closed after the
-    block within the "with" statement is executed.
-
-BAM/cmp.h5 compatibility: quick start
-------------------------------------
-
-If you have an application that uses the `CmpH5Reader` and you want to
-start using BAM files, your best bet is to use the following generic
-factory functions:
-
-.. autofunction:: pbcore.io.openIndexedAlignmentFile
-
-.. autofunction:: pbcore.io.openAlignmentFile
-
-.. note::
-
-   Since BAM files contain a subset of the information that was
-   present in cmp.h5 files, you will need to provide these functions
-   an indexed FASTA file for your reference.  For *full*
-   compatibility, you need the `openIndexedAlignmentFile` function,
-   which requires the existence of a `bam.pbi` file (PacBio BAM index
-   companion file).
-
-
-
-
-`bas.h5` / `bax.h5` Formats (PacBio basecalls file)
---------------------------------------------------
-
-The `bas.h5`/ `bax.h5` file formats are container formats for PacBio
-reads, built on top of the HDF5 standard.  Originally there was just
-one `bas.h5`, but eventually "multistreaming" came along and we had to
-split the file into three `bax.h5` *parts* and one `bas.h5` file
-containing pointers to the *parts*.  Use ``BasH5Reader`` to read any
-kind of `bas.h5` file, and ``BaxH5Reader`` to read a `bax.h5`.
-
-.. note::
-
-    In contrast to GFF, for example, the `bas.h5` read coordinate
-    system is 0-based and start-inclusive/end-exclusive, i.e. the same
-    convention as Python and the C++ STL.
-
-.. autoclass:: pbcore.io.BasH5Reader
-    :members:
-    :undoc-members:
-
-.. autoclass:: pbcore.io.BasH5IO.Zmw
-    :members:
-    :undoc-members:
-
-.. autoclass:: pbcore.io.BasH5IO.ZmwRead
-    :members:
-    :undoc-members:
-
-
 BAM format
 ----------

 The BAM format is a standard format described aligned and unaligned
-reads.  PacBio is transitioning from the cmp.h5 format to the BAM
-format.  For basic functionality, one should use :class:`BamReader`;
-for full compatibility with the :class:`CmpH5Reader` API (including
-alignment index functionality) one should use
-:class:`IndexedBamReader`, which requires the auxiliary *PacBio BAM
-index file* (``bam.pbi`` file).
+reads.  PacBio uses the BAM format exclusively.
+For basic functionality, one should use :class:`BamReader`;
+use :class:`IndexedBamReader` API for full index operation support,
+which requires the auxiliary *PacBio BAM index file* (``bam.pbi`` file).

 .. autoclass:: pbcore.io.BamAlignment
    :members:
@@ -126,29 +58,6 @@ index file* (``bam.pbi`` file).
    :undoc-members:


-
-`cmp.h5` format (legacy PacBio alignment file)
----------------------------------------------
-
-The `cmp.h5` file format is an alignment format built on top of the HDF5
-standard.  It is a simple container format for PacBio alignment records.
-
-.. note::
-
-    In contrast to GFF, for example, all `cmp.h5` coordinate systems
-    (refererence, read) are 0-based and start-inclusive/end-exclusive,
-    i.e. the same convention as Python and the C++ STL.
-
-
-.. autoclass:: pbcore.io.CmpH5Reader
-    :members:
-    :undoc-members:
-
-.. autoclass:: pbcore.io.CmpH5Alignment
-    :members:
-    :undoc-members:
-
-
 FASTA Format
 ------------


--- a/pbcore/__init__.py
+++ b/pbcore/__init__.py
-from __future__ import absolute_import
+import pkg_resources

-# don't forget to update setup.py and doc/conf.py too
-__VERSION__ = "1.7.1"
+try:
+    __VERSION__ = pkg_resources.get_distribution('pbcore').version
+except Exception:
+    __VERSION__ = 'unknown'
--- a/pbcore/chemistry/__init__.py
+++ b/pbcore/chemistry/__init__.py
-from __future__ import absolute_import
-
 from .chemistry import *
--- a/pbcore/chemistry/chemistry.py
+++ b/pbcore/chemistry/chemistry.py
-from __future__ import absolute_import
-
 __all__ = ["tripleFromMetadataXML",
           "decodeTriple",
-           "ChemistryLookupError" ]
+           "ChemistryLookupError"]

-import xml.etree.ElementTree as ET, os.path
-from pkg_resources import Requirement, resource_filename
 from collections import OrderedDict
+import xml.etree.ElementTree as ET
+import os.path
+
+from pkg_resources import Requirement, resource_filename
+
+
+class ChemistryLookupError(Exception):
+    pass

-class ChemistryLookupError(Exception): pass

 def _loadBarcodeMappingsFromFile(mapFile):
    try:
@@ -16,30 +19,38 @@ def _loadBarcodeMappingsFromFile(mapFile):
        root = tree.getroot()
        mappingElements = root.findall("Mapping")
        mappings = OrderedDict()
-        mapKeys = ["BindingKit", "SequencingKit", "SoftwareVersion", "SequencingChemistry"]
+        mapKeys = ["BindingKit", "SequencingKit",
+                   "SoftwareVersion", "SequencingChemistry"]
        for mapElement in mappingElements:
-            bindingKit          = mapElement.find("BindingKit").text
-            sequencingKit       = mapElement.find("SequencingKit").text
-            softwareVersion     = mapElement.find("SoftwareVersion").text
+            bindingKit = mapElement.find("BindingKit").text
+            sequencingKit = mapElement.find("SequencingKit").text
+            softwareVersion = mapElement.find("SoftwareVersion").text
            sequencingChemistry = mapElement.find("SequencingChemistry").text
-            mappings[(bindingKit, sequencingKit, softwareVersion)] = sequencingChemistry
+            mappings[(bindingKit, sequencingKit, softwareVersion)
+                     ] = sequencingChemistry
        return mappings
-    except:
+    except Exception:
        raise ChemistryLookupError("Error loading chemistry mapping xml")

+
 def _loadBarcodeMappings():
-    mappingFname = resource_filename(Requirement.parse('pbcore'),'pbcore/chemistry/resources/mapping.xml')
+    mappingFname = resource_filename(Requirement.parse(
+        'pbcore'), 'pbcore/chemistry/resources/mapping.xml')
    mappings = _loadBarcodeMappingsFromFile(mappingFname)
    updMappingDir = os.getenv("SMRT_CHEMISTRY_BUNDLE_DIR")
    if updMappingDir:
        import logging
        from os.path import join
-        logging.info("Loading updated chemistry mapping XML from {}".format(updMappingDir))
-        mappings.update(_loadBarcodeMappingsFromFile(join(updMappingDir, 'chemistry.xml')))
+        logging.info(
+            "Loading updated chemistry mapping XML from {}".format(updMappingDir))
+        mappings.update(_loadBarcodeMappingsFromFile(
+            join(updMappingDir, 'chemistry.xml')))
    return mappings

+
 _BARCODE_MAPPINGS = _loadBarcodeMappings()

+
 def tripleFromMetadataXML(metadataXmlPath):
    """
    Scrape the triple from the metadata.xml, or exception if the file
@@ -50,15 +61,20 @@ def tripleFromMetadataXML(metadataXmlPath):
    try:
        tree = ET.parse(metadataXmlPath)
        root = tree.getroot()
-        bindingKit = root.find("pb:BindingKit/pb:PartNumber", namespaces=nsd).text
-        sequencingKit = root.find("pb:SequencingKit/pb:PartNumber", namespaces=nsd).text
+        bindingKit = root.find(
+            "pb:BindingKit/pb:PartNumber", namespaces=nsd).text
+        sequencingKit = root.find(
+            "pb:SequencingKit/pb:PartNumber", namespaces=nsd).text
        # The instrument version is truncated to the first 2 dot delimited components
-        instrumentControlVersion = root.find("pb:InstCtrlVer", namespaces=nsd).text
+        instrumentControlVersion = root.find(
+            "pb:InstCtrlVer", namespaces=nsd).text
        verComponents = instrumentControlVersion.split(".")[0:2]
        instrumentControlVersion = ".".join(verComponents)
        return (bindingKit, sequencingKit, instrumentControlVersion)
    except Exception as e:
-        raise ChemistryLookupError("Could not find, or extract chemistry information from, %s" % (metadataXmlPath,))
+        raise ChemistryLookupError(
+            "Could not find, or extract chemistry information from, %s" % (metadataXmlPath,))
+

 def decodeTriple(bindingKit, sequencingKit, softwareVersion):
    """

--- a/pbcore/chemistry/resources/mapping.xml
+++ b/pbcore/chemistry/resources/mapping.xml
 <?xml version="1.0" encoding="utf-8"?>
 <MappingTable>
-  <DefaultSequencingChemistry>XL-C2</DefaultSequencingChemistry>
-  <Mapping>
-    <SequencingChemistry>C2</SequencingChemistry>
-    <BindingKit>001672551</BindingKit>
-    <SequencingKit>001558034</SequencingKit>
-    <SoftwareVersion>1.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>C2</SequencingChemistry>
-    <BindingKit>001672551</BindingKit>
-    <SequencingKit>001558034</SequencingKit>
-    <SoftwareVersion>2.0</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>C2</SequencingChemistry>
-    <BindingKit>001672551</BindingKit>
-    <SequencingKit>001558034</SequencingKit>
-    <SoftwareVersion>2.1</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>C2</SequencingChemistry>
-    <BindingKit>001672551</BindingKit>
-    <SequencingKit>001558034</SequencingKit>
-    <SoftwareVersion>2.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>XL-C2</SequencingChemistry>
-    <BindingKit>100150800</BindingKit>
-    <SequencingKit>001558034</SequencingKit>
-    <SoftwareVersion>1.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>XL-C2</SequencingChemistry>
-    <BindingKit>100150800</BindingKit>
-    <SequencingKit>001558034</SequencingKit>
-    <SoftwareVersion>2.0</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>XL-C2</SequencingChemistry>
-    <BindingKit>100150800</BindingKit>
-    <SequencingKit>001558034</SequencingKit>
-    <SoftwareVersion>2.1</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>XL-C2</SequencingChemistry>
-    <BindingKit>100150800</BindingKit>
-    <SequencingKit>001558034</SequencingKit>
-    <SoftwareVersion>2.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>XL-XL</SequencingChemistry>
-    <BindingKit>100150800</BindingKit>
-    <SequencingKit>100180800</SequencingKit>
-    <SoftwareVersion>1.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>XL-XL</SequencingChemistry>
-    <BindingKit>100150800</BindingKit>
-    <SequencingKit>100180800</SequencingKit>
-    <SoftwareVersion>2.0</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>XL-XL</SequencingChemistry>
-    <BindingKit>100150800</BindingKit>
-    <SequencingKit>100180800</SequencingKit>
-    <SoftwareVersion>2.1</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>XL-XL</SequencingChemistry>
-    <BindingKit>100150800</BindingKit>
-    <SequencingKit>100180800</SequencingKit>
-    <SoftwareVersion>2.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P4-C2</SequencingChemistry>
-    <BindingKit>100236500</BindingKit>
-    <SequencingKit>001558034</SequencingKit>
-    <SoftwareVersion>1.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P4-C2</SequencingChemistry>
-    <BindingKit>100236500</BindingKit>
-    <SequencingKit>001558034</SequencingKit>
-    <SoftwareVersion>2.0</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P4-C2</SequencingChemistry>
-    <BindingKit>100236500</BindingKit>
-    <SequencingKit>001558034</SequencingKit>
-    <SoftwareVersion>2.1</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P4-C2</SequencingChemistry>
-    <BindingKit>100236500</BindingKit>
-    <SequencingKit>001558034</SequencingKit>
-    <SoftwareVersion>2.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P4-XL</SequencingChemistry>
-    <BindingKit>100236500</BindingKit>
-    <SequencingKit>100180800</SequencingKit>
-    <SoftwareVersion>2.0</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P4-XL</SequencingChemistry>
-    <BindingKit>100236500</BindingKit>
-    <SequencingKit>100180800</SequencingKit>
-    <SoftwareVersion>2.1</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P4-XL</SequencingChemistry>
-    <BindingKit>100236500</BindingKit>
-    <SequencingKit>100180800</SequencingKit>
-    <SoftwareVersion>2.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P5-C3</SequencingChemistry>
-    <BindingKit>100256000</BindingKit>
-    <SequencingKit>100254800</SequencingKit>
-    <SoftwareVersion>2.1</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P5-C3</SequencingChemistry>
-    <BindingKit>100256000</BindingKit>
-    <SequencingKit>100254800</SequencingKit>
-    <SoftwareVersion>2.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P6-C4</SequencingChemistry>
-    <BindingKit>100356300</BindingKit>
-    <SequencingKit>100356200</SequencingKit>
-    <SoftwareVersion>2.1</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P6-C4</SequencingChemistry>
-    <BindingKit>100356300</BindingKit>
-    <SequencingKit>100356200</SequencingKit>
-    <SoftwareVersion>2.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P6-C4</SequencingChemistry>
-    <BindingKit>100372700</BindingKit>
-    <SequencingKit>100356200</SequencingKit>
-    <SoftwareVersion>2.1</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P6-C4</SequencingChemistry>
-    <BindingKit>100372700</BindingKit>
-    <SequencingKit>100356200</SequencingKit>
-    <SoftwareVersion>2.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P6-C4</SequencingChemistry>
-    <BindingKit>100356300</BindingKit>
-    <SequencingKit>100612400</SequencingKit>
-    <SoftwareVersion>2.1</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P6-C4</SequencingChemistry>
-    <BindingKit>100356300</BindingKit>
-    <SequencingKit>100612400</SequencingKit>
-    <SoftwareVersion>2.3</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P6-C4</SequencingChemistry>
-    <BindingKit>100372700</BindingKit>
-    <SequencingKit>100612400</SequencingKit>
-    <SoftwareVersion>2.1</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>P6-C4</SequencingChemistry>
-    <BindingKit>100372700</BindingKit>
-    <SequencingKit>100612400</SequencingKit>
-    <SoftwareVersion>2.3</SoftwareVersion>
-  </Mapping>
-
-  <!-- 3.0 ("Dromedary") chemistry -->
-  <Mapping>
-    <SequencingChemistry>S/P1-C1/beta</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-620-000</SequencingKit>
-    <SoftwareVersion>3.0</SoftwareVersion>
-  </Mapping>
-  <Mapping>
-    <SequencingChemistry>S/P1-C1/beta</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-620-000</SequencingKit>
-    <SoftwareVersion>3.1</SoftwareVersion>
-  </Mapping>
-
-  <!-- 3.1 ("Echidna") chemistry -->
-  <Mapping>
-    <SequencingChemistry>S/P1-C1.1</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-867-300</SequencingKit>
-    <SoftwareVersion>3.1</SoftwareVersion>
-  </Mapping>
-
-  <Mapping>
-    <SequencingChemistry>S/P1-C1.1</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-867-300</SequencingKit>
-    <SoftwareVersion>3.2</SoftwareVersion>
-  </Mapping>
-
-  <Mapping>
-    <SequencingChemistry>S/P1-C1.1</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-867-300</SequencingKit>
-    <SoftwareVersion>3.3</SoftwareVersion>
-  </Mapping>
-
-  <!-- 3.1.1 ("Flea") chemistry -->
-  <Mapping>
-    <SequencingChemistry>S/P1-C1.2</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-902-100</SequencingKit>
-    <SoftwareVersion>3.1</SoftwareVersion>
-  </Mapping>
-
-  <Mapping>
-    <SequencingChemistry>S/P1-C1.2</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-902-100</SequencingKit>
-    <SoftwareVersion>3.2</SoftwareVersion>
-  </Mapping>
-
-  <Mapping>
-    <SequencingChemistry>S/P1-C1.2</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-902-100</SequencingKit>
-    <SoftwareVersion>3.3</SoftwareVersion>
-  </Mapping>
-
-  <Mapping>
-    <SequencingChemistry>S/P1-C1.2</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-902-100</SequencingKit>
-    <SoftwareVersion>4.0</SoftwareVersion>
-  </Mapping>
-
-  <Mapping>
-    <SequencingChemistry>S/P1-C1.2</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-902-100</SequencingKit>
-    <SoftwareVersion>4.1</SoftwareVersion>
-  </Mapping>
-
-  <!---3.2 ("Goat") chemistry (a.k.a "Mimosa") -->
-  <Mapping>
-    <SequencingChemistry>S/P1-C1.3</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-972-200</SequencingKit>
-    <SoftwareVersion>3.2</SoftwareVersion>
-  </Mapping>
-
-  <Mapping>
-    <SequencingChemistry>S/P1-C1.3</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-972-200</SequencingKit>
-    <SoftwareVersion>3.3</SoftwareVersion>
-  </Mapping>
-
-  <Mapping>
-    <SequencingChemistry>S/P1-C1.3</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-972-200</SequencingKit>
-    <SoftwareVersion>4.0</SoftwareVersion>
-  </Mapping>
-
-  <Mapping>
-    <SequencingChemistry>S/P1-C1.3</SequencingChemistry>
-    <BindingKit>100-619-300</BindingKit>
-    <SequencingKit>100-972-200</SequencingKit>
-    <SoftwareVersion>4.1</SoftwareVersion>
-  </Mapping>
-
-  <!-- "Seabiscuit" chemistry, accompanied 4.0 SW -->
-  <Mapping>
-    <SequencingChemistry>S/P2-C2</SequencingChemistry>
-    <BindingKit>100-862-200</BindingKit>
-    <SequencingKit>100-861-800</SequencingKit>
-    <SoftwareVersion>4.0</SoftwareVersion>
-  </Mapping>
-
-  <Mapping>
-    <SequencingChemistry>S/P2-C2</SequencingChemistry>
-    <BindingKit>100-862-200</BindingKit>
-    <SequencingKit>100-861-800</SequencingKit>
-    <SoftwareVersion>4.1</SoftwareVersion>
-  </Mapping>
-
-  <Mapping>
-    <SequencingChemistry>S/P2-C2</SequencingChemistry>
-    <BindingKit>100-862-200</BindingKit>
-    <SequencingKit>101-093-700</SequencingKit>
-    <SoftwareVersion>4.1</SoftwareVersion>
-  </Mapping>
+  <DefaultSequencingChemistry>S/P3-C1/5.0-8M</DefaultSequencingChemistry>

  <Mapping>
    <SequencingChemistry>S/P2-C2/5.0</SequencingChemistry>
@@ -449,4 +152,52 @@
    <SoftwareVersion>5.0</SoftwareVersion>
  </Mapping>

+  <!-- // Sequel® II Binding Kit 2.0; Sequel® II Sequencing Plate 2.0EA (4 Rxn) -->
+  <Mapping>
+    <SequencingChemistry>S/P4-C2/5.0-8M</SequencingChemistry>
+    <BindingKit>101-789-500</BindingKit>
+    <SequencingKit>101-789-300</SequencingKit>
+    <SoftwareVersion>5.0</SoftwareVersion>
+  </Mapping>
+
+  <!-- // Sequel® II Binding Kit 2.0; Sequel® II Sequencing Plate 2.0 (4 Rxn) -->
+  <Mapping>
+    <SequencingChemistry>S/P4-C2/5.0-8M</SequencingChemistry>
+    <BindingKit>101-789-500</BindingKit>
+    <SequencingKit>101-826-100</SequencingKit>
+    <SoftwareVersion>5.0</SoftwareVersion>
+  </Mapping>
+
+  <!-- // Sequel® II Binding Kit 2.0; Sequel® II Sequencing Plate 2.0 (4 Rxn) - QC -->
+  <Mapping>
+    <SequencingChemistry>S/P4-C2/5.0-8M</SequencingChemistry>
+    <BindingKit>101-789-500</BindingKit>
+    <SequencingKit>101-820-300</SequencingKit>
+    <SoftwareVersion>5.0</SoftwareVersion>
+  </Mapping>
+
+  <!-- // Sequel® II Binding Kit 2.1; Sequel® II Sequencing Plate 2.0EA (4 Rxn) -->
+  <Mapping>
+    <SequencingChemistry>S/P4.1-C2/5.0-8M</SequencingChemistry>
+    <BindingKit>101-820-500</BindingKit>
+    <SequencingKit>101-789-300</SequencingKit>
+    <SoftwareVersion>5.0</SoftwareVersion>
+  </Mapping>
+
+  <!-- // Sequel® II Binding Kit 2.1; Sequel® II Sequencing Plate 2.0 (4 Rxn) -->
+  <Mapping>
+    <SequencingChemistry>S/P4.1-C2/5.0-8M</SequencingChemistry>
+    <BindingKit>101-820-500</BindingKit>
+    <SequencingKit>101-826-100</SequencingKit>
+    <SoftwareVersion>5.0</SoftwareVersion>
+  </Mapping>
+
+  <!-- // Sequel® II Binding Kit 2.1; Sequel® II Sequencing Plate 2.0 (4 Rxn) - QC -->
+  <Mapping>
+    <SequencingChemistry>S/P4.1-C2/5.0-8M</SequencingChemistry>
+    <BindingKit>101-820-500</BindingKit>
+    <SequencingKit>101-820-300</SequencingKit>
+    <SoftwareVersion>5.0</SoftwareVersion>
+  </Mapping>
+
 </MappingTable>
--- a/pbcore/data/1.4_bas_files.fofn
+++ b/pbcore/data/1.4_bas_files.fofn
-m110818_075520_42141_c100129202555500000315043109121112_s1_p0.bas.h5
-m110818_075520_42141_c100129202555500000315043109121112_s2_p0.bas.h5
--- a/pbcore/data/2.0_bax_files.fofn
+++ b/pbcore/data/2.0_bax_files.fofn
-m130522_092457_42208_c100497142550000001823078008081323_s1_p0.1.bax.h5
-m130522_092457_42208_c100497142550000001823078008081323_s1_p0.2.bax.h5
-m130522_092457_42208_c100497142550000001823078008081323_s1_p0.3.bax.h5
--- a/pbcore/data/2.1_bax_files.fofn
+++ b/pbcore/data/2.1_bax_files.fofn
-m130731_192718_42129_c100564662550000001823085912221321_s1_p0.1.bax.h5
-m130731_192718_42129_c100564662550000001823085912221321_s1_p0.2.bax.h5
-m130731_192718_42129_c100564662550000001823085912221321_s1_p0.3.bax.h5
--- a/pbcore/data/2.1_ccs_files.fofn
+++ b/pbcore/data/2.1_ccs_files.fofn
-m130727_114215_42211_c100569412550000001823090301191423_s1_p0.1.ccs.h5
--- a/pbcore/data/2.3_bax_files.fofn
+++ b/pbcore/data/2.3_bax_files.fofn
-m140912_020930_00114_c100702482550000001823141103261590_s1_p0.1.bax.h5
-m140912_020930_00114_c100702482550000001823141103261590_s1_p0.2.bax.h5
-m140912_020930_00114_c100702482550000001823141103261590_s1_p0.3.bax.h5
--- a/pbcore/data/Makefile
+++ b/pbcore/data/Makefile
-MOVIE1 := m140905_042212_sidney_c100564852550000001823085912221377_s1_X0
-
-all: $(MOVIE1).subreads.bam $(MOVIE1).aligned_subreads.cmp.h5 $(MOVIE1).aligned_subreads.bam testdata upstream_testdata
-
-$(MOVIE1).subreads.bam: $(MOVIE1).1.bax.h5
-	bax2bam $<
-	samtools index $@
-	pbindex $@
-
-$(MOVIE1).aligned_subreads.bam: $(MOVIE1).subreads.bam lambdaNEB.fa
-	pbalign $^ $@
-	samtools index $@
-	pbindex $@
-
-$(MOVIE1).aligned_subreads.cmp.h5: $(MOVIE1).1.bax.h5 lambdaNEB.fa
-	pbalign-2.3 --forQuiver --metrics IPD,DeletionQV,DeletionTag,InsertionQV,MergeQV,SubstitutionQV $^ $@
-
-testdata:
-	dataset.py create --type ReferenceSet --relative datasets/pbalchemysim0.referenceset.xml datasets/pbalchemysim0.reference.fasta
-	bax2bam datasets/pbalchemysim0.bas.h5 -o datasets/pbalchemysim0 --pulsefeatures DeletionTag,DeletionQV,InsertionQV,MergeQV,SubstitutionQV && \
-	samtools index datasets/pbalchemysim0.subreads.bam
-	pbindex datasets/pbalchemysim0.subreads.bam
-	dataset.py create --type SubreadSet --relative datasets/pbalchemysim0.subreadset.xml datasets/pbalchemysim0.subreads.bam
-	pbalign datasets/pbalchemysim0.subreads.bam datasets/pbalchemysim0.reference.fasta datasets/pbalchemysim0.pbalign.bam && \
-	dataset.py create --type AlignmentSet --relative datasets/pbalchemysim0.alignmentset.xml datasets/pbalchemysim0.pbalign.bam
-	dataset.py split --contigs --chunks 2 datasets/pbalchemysim0.alignmentset.xml
-	dataset.py create --type AlignmentSet --relative datasets/pbalchemysim0.alignmentset.chunk0contigs.xml datasets/pbalchemysim0.alignmentset.chunk0contigs.xml
-	dataset.py create --type AlignmentSet --relative datasets/pbalchemysim0.alignmentset.chunk1contigs.xml datasets/pbalchemysim0.alignmentset.chunk1contigs.xml
-	bax2bam datasets/pbalchemysim1.bas.h5 -o datasets/pbalchemysim1 --pulsefeatures DeletionTag,DeletionQV,InsertionQV,MergeQV,SubstitutionQV && \
-	samtools index datasets/pbalchemysim1.subreads.bam
-	pbindex datasets/pbalchemysim1.subreads.bam
-	dataset.py create --type SubreadSet --relative datasets/pbalchemysim1.subreadset.xml datasets/pbalchemysim1.subreads.bam
-	pbalign datasets/pbalchemysim1.subreads.bam datasets/pbalchemysim0.reference.fasta datasets/pbalchemysim1.pbalign.bam && \
-	dataset.py create --type AlignmentSet --relative datasets/pbalchemysim1.alignmentset.xml datasets/pbalchemysim1.pbalign.bam
-	dataset.py create --type AlignmentSet --relative datasets/pbalchemysim.alignmentset.xml datasets/pbalchemysim0.alignmentset.xml datasets/pbalchemysim1.alignmentset.xml
-	dataset.py create --type SubreadSet --relative datasets/subreadSetWithStats.xml datasets/pbalchemysim0.subreadset.xml
-	dataset.py loadstats datasets/subreadSetWithStats.xml datasets/m150430_142051_Mon_p1_b25.sts.xml
-	dataset.py loadstats datasets/subreadSetWithStats.xml datasets/m150616_053259_ethan_c100710482550000001823136404221563_s1_p0.sts.xml
-	dataset.py create --type SubreadSet --relative datasets/subreadSetWithStats.xml datasets/subreadSetWithStats.xml
-	dataset.py create --type HdfSubreadSet --relative datasets/pbalchemysim0.hdfsubreadset.xml datasets/pbalchemysim0.bas.h5
-	dataset.py create --type HdfSubreadSet --relative datasets/pbalchemysim1.hdfsubreadset.xml datasets/pbalchemysim1.bas.h5
-	dataset.py create --type HdfSubreadSet --relative datasets/pbalchemysim.hdfsubreadset.xml datasets/pbalchemysim0.bas.h5 datasets/pbalchemysim1.bas.h5
-
-upstream_testdata:
-	dataset.py create --type ReferenceSet --relative datasets/lambda.referenceset.xml lambdaNEB.fa
-	dataset.py create --type AlignmentSet --relative datasets/lambda.alignmentset.xml $(MOVIE1).aligned_subreads.bam
-
-xsdupdate:
-	cp ../../../../../doc/FileFormats/examples/datasets/*.xml datasets/.
-