Skip to content
Commits on Source (2)
......@@ -4,6 +4,7 @@ doc/_build
*~
*.pyc
*.egg-info
.eggs/
nosetests.xml
coverage.xml
.coverage
......
[MASTER]
extension-pkg-whitelist=numpy,xlml.etree,pysam.samtools,h5py.h5s
extension-pkg-whitelist=numpy,xlml.etree,pysam.samtools
[TYPECHECK]
......@@ -7,7 +7,7 @@ extension-pkg-whitelist=numpy,xlml.etree,pysam.samtools,h5py.h5s
# (useful for modules/projects where namespaces are manipulated during runtime
# and thus existing member attributes cannot be deduced by static analysis. It
# supports qualified module names, as well as Unix pattern matching.
ignored-modules=numpy,lxml.etree,pysam.samtools,h5py.h5s
ignored-modules=numpy,lxml.etree,pysam.samtools
# List of classes names for which member attributes should not be checked
# (useful for classes with attributes dynamically set). This supports can work
......
* Version 2.0
- Python 3.7 port, removed support for Python 2.x
* Version 1.4.0
- Support for Vcf4Records and merging VCF files
......
......@@ -15,7 +15,11 @@ install:
@pip install ./
pylint:
pylint --errors-only --ignore=pyxb pbcore/
pylint --errors-only --ignore=pyxb --enable=C0411,W0702,W0401,W0611 pbcore/
autopep8:
find pbcore -name "*.py" | xargs autopep8 -i
find tests -name "*.py" | xargs autopep8 -i
clean: doc-clean
rm -rf build/;\
......@@ -33,8 +37,7 @@ doctest:
cd doc && make doctest
unit-test:
#nosetests --with-coverage --cover-xml-file=coverage.xml --cover-package=pbcore --cover-xml --with-xunit -v tests
pytest -v -n auto --dist=loadscope --durations=20 --junitxml=nosetests.xml --cov=./pbcore --cov-report=xml:coverage.xml tests/test_*.py
python setup.py test
sed -i -e 's@filename="@filename="./@g' coverage.xml
test: doctest unit-test
......
#!/bin/bash
type module >& /dev/null || . /mnt/software/Modules/current/init/bash
module load python/2
module load python/3
module load htslib # since pysam was built against this
set -ex
nproc
......@@ -18,13 +19,10 @@ else
WHEELHOUSE=/mnt/software/p/python/wheelhouse/develop
fi
rm -rf build
rm -rf build
mkdir -p build/bin build/lib build/include build/share
$PIP install --user --no-index --find-link $WHEELHOUSE --no-compile -e .[test]
$PIP install --user --no-index --find-link $WHEELHOUSE pbtestdata
$PIP install --user --no-index --find-link $WHEELHOUSE pytest-xdist
$PIP install --user --no-index --find-link $WHEELHOUSE pytest-cov
#$PIP install --user --no-index --find-link $WHEELHOUSE pytest-parallel # not sure why this fails
$PIP install --user --no-index --find-link $WHEELHOUSE --no-compile -e '.[test]'
pytest --trace-config --collect-only
set +e
......
......@@ -23,18 +23,9 @@ export WHEELHOUSE=./wheelhouse
# Give everybody read/write access.
umask 0000
module load python/2-UCS2
make wheel
# For now, we have only "any" wheels, so we do not need to build again.
module unload python
module load python/2-UCS4
module load python/3
make wheel
# http://bamboo.pacificbiosciences.com:8085/build/admin/edit/defaultBuildArtifact.action?buildKey=SAT-TAGDEPS-JOB1
# For old artifact config:
#mkdir -p ./artifacts/gcc-6.4.0/wheelhouse
......
#!/usr/bin/env python
from __future__ import print_function
import argparse
import tempfile
import shutil
......
......@@ -11,10 +11,13 @@
# All configuration values have a default; values that are commented out
# serve to show the default.
import pkg_resources
import sys, os
# don't forget to update setup.py and pbcore/__init__.py too
__VERSION__ = '1.7.1'
try:
__VERSION__ = pkg_resources.get_distribution('pbcore').version
except Exception:
__VERSION__ = 'unknown'
# If extensions (or modules to document with autodoc) are in another directory,
......
......@@ -301,6 +301,7 @@ do them using the API, rather than the CLI.
# open:
dsets = [AlignmentSet(fn) for fn in filename_list]
# merge with + operator:
from functools import reduce
dset = reduce(lambda x, y: x + y, dsets)
# OR:
......
......@@ -4,114 +4,46 @@ pbcore.io
The ``pbcore.io`` package provides a number of lightweight interfaces
to PacBio data files and other standard bioinformatics file formats.
Preferred usage is to import classes directly from the ``pbcore.io``
package, e.g.::
>>> from pbcore.io import CmpH5Reader
package.
The classes within ``pbcore.io`` adhere to a few conventions, in order
to provide a uniform API:
- Each data file type is thought of as a container of a `Record`
type; all `Reader` classes support streaming access by iterating on the
reader object, and
`CmpH5Reader`, `BasH5Reader` and `IndexedBarReader` additionally
provide random-access
to alignments/reads.
type; all `Reader` classes support streaming access by iterating on the
reader object, and `IndexedBarReader` additionally provides
random-access to alignments/reads.
For example::
from pbcore.io import *
with IndexedBamReader(filename) as f:
for r in f:
process(r)
To make scripts a bit more user friendly, a progress bar can be
easily added using the `tqdm` third-party package::
from pbcore.io import *
from tqdm import tqdm
with IndexedBamReader(filename) as f:
for r in tqdm(f):
process(r)
- The constructor argument needed to instantiate `Reader` and
`Writer` objects can be either a filename (which can be suffixed
by ".gz" for all but the h5 file types) or an open file handle.
by ".gz" for all file types) or an open file handle.
The reader/writer classes will do what you would expect.
- The reader/writer classes all support the context manager idiom.
Meaning, if you write::
>>> with CmpH5Reader("aligned_reads.cmp.h5") as r:
... print r[0].read()
the `CmpH5Reader` object will be automatically closed after the
block within the "with" statement is executed.
BAM/cmp.h5 compatibility: quick start
-------------------------------------
If you have an application that uses the `CmpH5Reader` and you want to
start using BAM files, your best bet is to use the following generic
factory functions:
.. autofunction:: pbcore.io.openIndexedAlignmentFile
.. autofunction:: pbcore.io.openAlignmentFile
.. note::
Since BAM files contain a subset of the information that was
present in cmp.h5 files, you will need to provide these functions
an indexed FASTA file for your reference. For *full*
compatibility, you need the `openIndexedAlignmentFile` function,
which requires the existence of a `bam.pbi` file (PacBio BAM index
companion file).
`bas.h5` / `bax.h5` Formats (PacBio basecalls file)
---------------------------------------------------
The `bas.h5`/ `bax.h5` file formats are container formats for PacBio
reads, built on top of the HDF5 standard. Originally there was just
one `bas.h5`, but eventually "multistreaming" came along and we had to
split the file into three `bax.h5` *parts* and one `bas.h5` file
containing pointers to the *parts*. Use ``BasH5Reader`` to read any
kind of `bas.h5` file, and ``BaxH5Reader`` to read a `bax.h5`.
.. note::
In contrast to GFF, for example, the `bas.h5` read coordinate
system is 0-based and start-inclusive/end-exclusive, i.e. the same
convention as Python and the C++ STL.
.. autoclass:: pbcore.io.BasH5Reader
:members:
:undoc-members:
.. autoclass:: pbcore.io.BasH5IO.Zmw
:members:
:undoc-members:
.. autoclass:: pbcore.io.BasH5IO.ZmwRead
:members:
:undoc-members:
BAM format
----------
The BAM format is a standard format described aligned and unaligned
reads. PacBio is transitioning from the cmp.h5 format to the BAM
format. For basic functionality, one should use :class:`BamReader`;
for full compatibility with the :class:`CmpH5Reader` API (including
alignment index functionality) one should use
:class:`IndexedBamReader`, which requires the auxiliary *PacBio BAM
index file* (``bam.pbi`` file).
reads. PacBio uses the BAM format exclusively.
For basic functionality, one should use :class:`BamReader`;
use :class:`IndexedBamReader` API for full index operation support,
which requires the auxiliary *PacBio BAM index file* (``bam.pbi`` file).
.. autoclass:: pbcore.io.BamAlignment
:members:
......@@ -126,29 +58,6 @@ index file* (``bam.pbi`` file).
:undoc-members:
`cmp.h5` format (legacy PacBio alignment file)
----------------------------------------------
The `cmp.h5` file format is an alignment format built on top of the HDF5
standard. It is a simple container format for PacBio alignment records.
.. note::
In contrast to GFF, for example, all `cmp.h5` coordinate systems
(refererence, read) are 0-based and start-inclusive/end-exclusive,
i.e. the same convention as Python and the C++ STL.
.. autoclass:: pbcore.io.CmpH5Reader
:members:
:undoc-members:
.. autoclass:: pbcore.io.CmpH5Alignment
:members:
:undoc-members:
FASTA Format
------------
......
from __future__ import absolute_import
import pkg_resources
# don't forget to update setup.py and doc/conf.py too
__VERSION__ = "1.7.1"
try:
__VERSION__ = pkg_resources.get_distribution('pbcore').version
except Exception:
__VERSION__ = 'unknown'
from __future__ import absolute_import
from .chemistry import *
from __future__ import absolute_import
__all__ = ["tripleFromMetadataXML",
"decodeTriple",
"ChemistryLookupError" ]
"ChemistryLookupError"]
import xml.etree.ElementTree as ET, os.path
from pkg_resources import Requirement, resource_filename
from collections import OrderedDict
import xml.etree.ElementTree as ET
import os.path
from pkg_resources import Requirement, resource_filename
class ChemistryLookupError(Exception):
pass
class ChemistryLookupError(Exception): pass
def _loadBarcodeMappingsFromFile(mapFile):
try:
......@@ -16,30 +19,38 @@ def _loadBarcodeMappingsFromFile(mapFile):
root = tree.getroot()
mappingElements = root.findall("Mapping")
mappings = OrderedDict()
mapKeys = ["BindingKit", "SequencingKit", "SoftwareVersion", "SequencingChemistry"]
mapKeys = ["BindingKit", "SequencingKit",
"SoftwareVersion", "SequencingChemistry"]
for mapElement in mappingElements:
bindingKit = mapElement.find("BindingKit").text
sequencingKit = mapElement.find("SequencingKit").text
softwareVersion = mapElement.find("SoftwareVersion").text
bindingKit = mapElement.find("BindingKit").text
sequencingKit = mapElement.find("SequencingKit").text
softwareVersion = mapElement.find("SoftwareVersion").text
sequencingChemistry = mapElement.find("SequencingChemistry").text
mappings[(bindingKit, sequencingKit, softwareVersion)] = sequencingChemistry
mappings[(bindingKit, sequencingKit, softwareVersion)
] = sequencingChemistry
return mappings
except:
except Exception:
raise ChemistryLookupError("Error loading chemistry mapping xml")
def _loadBarcodeMappings():
mappingFname = resource_filename(Requirement.parse('pbcore'),'pbcore/chemistry/resources/mapping.xml')
mappingFname = resource_filename(Requirement.parse(
'pbcore'), 'pbcore/chemistry/resources/mapping.xml')
mappings = _loadBarcodeMappingsFromFile(mappingFname)
updMappingDir = os.getenv("SMRT_CHEMISTRY_BUNDLE_DIR")
if updMappingDir:
import logging
from os.path import join
logging.info("Loading updated chemistry mapping XML from {}".format(updMappingDir))
mappings.update(_loadBarcodeMappingsFromFile(join(updMappingDir, 'chemistry.xml')))
logging.info(
"Loading updated chemistry mapping XML from {}".format(updMappingDir))
mappings.update(_loadBarcodeMappingsFromFile(
join(updMappingDir, 'chemistry.xml')))
return mappings
_BARCODE_MAPPINGS = _loadBarcodeMappings()
def tripleFromMetadataXML(metadataXmlPath):
"""
Scrape the triple from the metadata.xml, or exception if the file
......@@ -50,15 +61,20 @@ def tripleFromMetadataXML(metadataXmlPath):
try:
tree = ET.parse(metadataXmlPath)
root = tree.getroot()
bindingKit = root.find("pb:BindingKit/pb:PartNumber", namespaces=nsd).text
sequencingKit = root.find("pb:SequencingKit/pb:PartNumber", namespaces=nsd).text
bindingKit = root.find(
"pb:BindingKit/pb:PartNumber", namespaces=nsd).text
sequencingKit = root.find(
"pb:SequencingKit/pb:PartNumber", namespaces=nsd).text
# The instrument version is truncated to the first 2 dot delimited components
instrumentControlVersion = root.find("pb:InstCtrlVer", namespaces=nsd).text
instrumentControlVersion = root.find(
"pb:InstCtrlVer", namespaces=nsd).text
verComponents = instrumentControlVersion.split(".")[0:2]
instrumentControlVersion = ".".join(verComponents)
return (bindingKit, sequencingKit, instrumentControlVersion)
except Exception as e:
raise ChemistryLookupError("Could not find, or extract chemistry information from, %s" % (metadataXmlPath,))
raise ChemistryLookupError(
"Could not find, or extract chemistry information from, %s" % (metadataXmlPath,))
def decodeTriple(bindingKit, sequencingKit, softwareVersion):
"""
......
<?xml version="1.0" encoding="utf-8"?>
<MappingTable>
<DefaultSequencingChemistry>XL-C2</DefaultSequencingChemistry>
<Mapping>
<SequencingChemistry>C2</SequencingChemistry>
<BindingKit>001672551</BindingKit>
<SequencingKit>001558034</SequencingKit>
<SoftwareVersion>1.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>C2</SequencingChemistry>
<BindingKit>001672551</BindingKit>
<SequencingKit>001558034</SequencingKit>
<SoftwareVersion>2.0</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>C2</SequencingChemistry>
<BindingKit>001672551</BindingKit>
<SequencingKit>001558034</SequencingKit>
<SoftwareVersion>2.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>C2</SequencingChemistry>
<BindingKit>001672551</BindingKit>
<SequencingKit>001558034</SequencingKit>
<SoftwareVersion>2.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>XL-C2</SequencingChemistry>
<BindingKit>100150800</BindingKit>
<SequencingKit>001558034</SequencingKit>
<SoftwareVersion>1.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>XL-C2</SequencingChemistry>
<BindingKit>100150800</BindingKit>
<SequencingKit>001558034</SequencingKit>
<SoftwareVersion>2.0</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>XL-C2</SequencingChemistry>
<BindingKit>100150800</BindingKit>
<SequencingKit>001558034</SequencingKit>
<SoftwareVersion>2.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>XL-C2</SequencingChemistry>
<BindingKit>100150800</BindingKit>
<SequencingKit>001558034</SequencingKit>
<SoftwareVersion>2.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>XL-XL</SequencingChemistry>
<BindingKit>100150800</BindingKit>
<SequencingKit>100180800</SequencingKit>
<SoftwareVersion>1.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>XL-XL</SequencingChemistry>
<BindingKit>100150800</BindingKit>
<SequencingKit>100180800</SequencingKit>
<SoftwareVersion>2.0</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>XL-XL</SequencingChemistry>
<BindingKit>100150800</BindingKit>
<SequencingKit>100180800</SequencingKit>
<SoftwareVersion>2.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>XL-XL</SequencingChemistry>
<BindingKit>100150800</BindingKit>
<SequencingKit>100180800</SequencingKit>
<SoftwareVersion>2.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P4-C2</SequencingChemistry>
<BindingKit>100236500</BindingKit>
<SequencingKit>001558034</SequencingKit>
<SoftwareVersion>1.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P4-C2</SequencingChemistry>
<BindingKit>100236500</BindingKit>
<SequencingKit>001558034</SequencingKit>
<SoftwareVersion>2.0</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P4-C2</SequencingChemistry>
<BindingKit>100236500</BindingKit>
<SequencingKit>001558034</SequencingKit>
<SoftwareVersion>2.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P4-C2</SequencingChemistry>
<BindingKit>100236500</BindingKit>
<SequencingKit>001558034</SequencingKit>
<SoftwareVersion>2.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P4-XL</SequencingChemistry>
<BindingKit>100236500</BindingKit>
<SequencingKit>100180800</SequencingKit>
<SoftwareVersion>2.0</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P4-XL</SequencingChemistry>
<BindingKit>100236500</BindingKit>
<SequencingKit>100180800</SequencingKit>
<SoftwareVersion>2.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P4-XL</SequencingChemistry>
<BindingKit>100236500</BindingKit>
<SequencingKit>100180800</SequencingKit>
<SoftwareVersion>2.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P5-C3</SequencingChemistry>
<BindingKit>100256000</BindingKit>
<SequencingKit>100254800</SequencingKit>
<SoftwareVersion>2.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P5-C3</SequencingChemistry>
<BindingKit>100256000</BindingKit>
<SequencingKit>100254800</SequencingKit>
<SoftwareVersion>2.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P6-C4</SequencingChemistry>
<BindingKit>100356300</BindingKit>
<SequencingKit>100356200</SequencingKit>
<SoftwareVersion>2.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P6-C4</SequencingChemistry>
<BindingKit>100356300</BindingKit>
<SequencingKit>100356200</SequencingKit>
<SoftwareVersion>2.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P6-C4</SequencingChemistry>
<BindingKit>100372700</BindingKit>
<SequencingKit>100356200</SequencingKit>
<SoftwareVersion>2.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P6-C4</SequencingChemistry>
<BindingKit>100372700</BindingKit>
<SequencingKit>100356200</SequencingKit>
<SoftwareVersion>2.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P6-C4</SequencingChemistry>
<BindingKit>100356300</BindingKit>
<SequencingKit>100612400</SequencingKit>
<SoftwareVersion>2.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P6-C4</SequencingChemistry>
<BindingKit>100356300</BindingKit>
<SequencingKit>100612400</SequencingKit>
<SoftwareVersion>2.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P6-C4</SequencingChemistry>
<BindingKit>100372700</BindingKit>
<SequencingKit>100612400</SequencingKit>
<SoftwareVersion>2.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>P6-C4</SequencingChemistry>
<BindingKit>100372700</BindingKit>
<SequencingKit>100612400</SequencingKit>
<SoftwareVersion>2.3</SoftwareVersion>
</Mapping>
<!-- 3.0 ("Dromedary") chemistry -->
<Mapping>
<SequencingChemistry>S/P1-C1/beta</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-620-000</SequencingKit>
<SoftwareVersion>3.0</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>S/P1-C1/beta</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-620-000</SequencingKit>
<SoftwareVersion>3.1</SoftwareVersion>
</Mapping>
<!-- 3.1 ("Echidna") chemistry -->
<Mapping>
<SequencingChemistry>S/P1-C1.1</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-867-300</SequencingKit>
<SoftwareVersion>3.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>S/P1-C1.1</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-867-300</SequencingKit>
<SoftwareVersion>3.2</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>S/P1-C1.1</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-867-300</SequencingKit>
<SoftwareVersion>3.3</SoftwareVersion>
</Mapping>
<!-- 3.1.1 ("Flea") chemistry -->
<Mapping>
<SequencingChemistry>S/P1-C1.2</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-902-100</SequencingKit>
<SoftwareVersion>3.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>S/P1-C1.2</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-902-100</SequencingKit>
<SoftwareVersion>3.2</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>S/P1-C1.2</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-902-100</SequencingKit>
<SoftwareVersion>3.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>S/P1-C1.2</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-902-100</SequencingKit>
<SoftwareVersion>4.0</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>S/P1-C1.2</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-902-100</SequencingKit>
<SoftwareVersion>4.1</SoftwareVersion>
</Mapping>
<!---3.2 ("Goat") chemistry (a.k.a "Mimosa") -->
<Mapping>
<SequencingChemistry>S/P1-C1.3</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-972-200</SequencingKit>
<SoftwareVersion>3.2</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>S/P1-C1.3</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-972-200</SequencingKit>
<SoftwareVersion>3.3</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>S/P1-C1.3</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-972-200</SequencingKit>
<SoftwareVersion>4.0</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>S/P1-C1.3</SequencingChemistry>
<BindingKit>100-619-300</BindingKit>
<SequencingKit>100-972-200</SequencingKit>
<SoftwareVersion>4.1</SoftwareVersion>
</Mapping>
<!-- "Seabiscuit" chemistry, accompanied 4.0 SW -->
<Mapping>
<SequencingChemistry>S/P2-C2</SequencingChemistry>
<BindingKit>100-862-200</BindingKit>
<SequencingKit>100-861-800</SequencingKit>
<SoftwareVersion>4.0</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>S/P2-C2</SequencingChemistry>
<BindingKit>100-862-200</BindingKit>
<SequencingKit>100-861-800</SequencingKit>
<SoftwareVersion>4.1</SoftwareVersion>
</Mapping>
<Mapping>
<SequencingChemistry>S/P2-C2</SequencingChemistry>
<BindingKit>100-862-200</BindingKit>
<SequencingKit>101-093-700</SequencingKit>
<SoftwareVersion>4.1</SoftwareVersion>
</Mapping>
<DefaultSequencingChemistry>S/P3-C1/5.0-8M</DefaultSequencingChemistry>
<Mapping>
<SequencingChemistry>S/P2-C2/5.0</SequencingChemistry>
......@@ -449,4 +152,52 @@
<SoftwareVersion>5.0</SoftwareVersion>
</Mapping>
<!-- // Sequel® II Binding Kit 2.0; Sequel® II Sequencing Plate 2.0EA (4 Rxn) -->
<Mapping>
<SequencingChemistry>S/P4-C2/5.0-8M</SequencingChemistry>
<BindingKit>101-789-500</BindingKit>
<SequencingKit>101-789-300</SequencingKit>
<SoftwareVersion>5.0</SoftwareVersion>
</Mapping>
<!-- // Sequel® II Binding Kit 2.0; Sequel® II Sequencing Plate 2.0 (4 Rxn) -->
<Mapping>
<SequencingChemistry>S/P4-C2/5.0-8M</SequencingChemistry>
<BindingKit>101-789-500</BindingKit>
<SequencingKit>101-826-100</SequencingKit>
<SoftwareVersion>5.0</SoftwareVersion>
</Mapping>
<!-- // Sequel® II Binding Kit 2.0; Sequel® II Sequencing Plate 2.0 (4 Rxn) - QC -->
<Mapping>
<SequencingChemistry>S/P4-C2/5.0-8M</SequencingChemistry>
<BindingKit>101-789-500</BindingKit>
<SequencingKit>101-820-300</SequencingKit>
<SoftwareVersion>5.0</SoftwareVersion>
</Mapping>
<!-- // Sequel® II Binding Kit 2.1; Sequel® II Sequencing Plate 2.0EA (4 Rxn) -->
<Mapping>
<SequencingChemistry>S/P4.1-C2/5.0-8M</SequencingChemistry>
<BindingKit>101-820-500</BindingKit>
<SequencingKit>101-789-300</SequencingKit>
<SoftwareVersion>5.0</SoftwareVersion>
</Mapping>
<!-- // Sequel® II Binding Kit 2.1; Sequel® II Sequencing Plate 2.0 (4 Rxn) -->
<Mapping>
<SequencingChemistry>S/P4.1-C2/5.0-8M</SequencingChemistry>
<BindingKit>101-820-500</BindingKit>
<SequencingKit>101-826-100</SequencingKit>
<SoftwareVersion>5.0</SoftwareVersion>
</Mapping>
<!-- // Sequel® II Binding Kit 2.1; Sequel® II Sequencing Plate 2.0 (4 Rxn) - QC -->
<Mapping>
<SequencingChemistry>S/P4.1-C2/5.0-8M</SequencingChemistry>
<BindingKit>101-820-500</BindingKit>
<SequencingKit>101-820-300</SequencingKit>
<SoftwareVersion>5.0</SoftwareVersion>
</Mapping>
</MappingTable>
m110818_075520_42141_c100129202555500000315043109121112_s1_p0.bas.h5
m110818_075520_42141_c100129202555500000315043109121112_s2_p0.bas.h5
m130522_092457_42208_c100497142550000001823078008081323_s1_p0.1.bax.h5
m130522_092457_42208_c100497142550000001823078008081323_s1_p0.2.bax.h5
m130522_092457_42208_c100497142550000001823078008081323_s1_p0.3.bax.h5
m130731_192718_42129_c100564662550000001823085912221321_s1_p0.1.bax.h5
m130731_192718_42129_c100564662550000001823085912221321_s1_p0.2.bax.h5
m130731_192718_42129_c100564662550000001823085912221321_s1_p0.3.bax.h5
m130727_114215_42211_c100569412550000001823090301191423_s1_p0.1.ccs.h5
m140912_020930_00114_c100702482550000001823141103261590_s1_p0.1.bax.h5
m140912_020930_00114_c100702482550000001823141103261590_s1_p0.2.bax.h5
m140912_020930_00114_c100702482550000001823141103261590_s1_p0.3.bax.h5
MOVIE1 := m140905_042212_sidney_c100564852550000001823085912221377_s1_X0
all: $(MOVIE1).subreads.bam $(MOVIE1).aligned_subreads.cmp.h5 $(MOVIE1).aligned_subreads.bam testdata upstream_testdata
$(MOVIE1).subreads.bam: $(MOVIE1).1.bax.h5
bax2bam $<
samtools index $@
pbindex $@
$(MOVIE1).aligned_subreads.bam: $(MOVIE1).subreads.bam lambdaNEB.fa
pbalign $^ $@
samtools index $@
pbindex $@
$(MOVIE1).aligned_subreads.cmp.h5: $(MOVIE1).1.bax.h5 lambdaNEB.fa
pbalign-2.3 --forQuiver --metrics IPD,DeletionQV,DeletionTag,InsertionQV,MergeQV,SubstitutionQV $^ $@
testdata:
dataset.py create --type ReferenceSet --relative datasets/pbalchemysim0.referenceset.xml datasets/pbalchemysim0.reference.fasta
bax2bam datasets/pbalchemysim0.bas.h5 -o datasets/pbalchemysim0 --pulsefeatures DeletionTag,DeletionQV,InsertionQV,MergeQV,SubstitutionQV && \
samtools index datasets/pbalchemysim0.subreads.bam
pbindex datasets/pbalchemysim0.subreads.bam
dataset.py create --type SubreadSet --relative datasets/pbalchemysim0.subreadset.xml datasets/pbalchemysim0.subreads.bam
pbalign datasets/pbalchemysim0.subreads.bam datasets/pbalchemysim0.reference.fasta datasets/pbalchemysim0.pbalign.bam && \
dataset.py create --type AlignmentSet --relative datasets/pbalchemysim0.alignmentset.xml datasets/pbalchemysim0.pbalign.bam
dataset.py split --contigs --chunks 2 datasets/pbalchemysim0.alignmentset.xml
dataset.py create --type AlignmentSet --relative datasets/pbalchemysim0.alignmentset.chunk0contigs.xml datasets/pbalchemysim0.alignmentset.chunk0contigs.xml
dataset.py create --type AlignmentSet --relative datasets/pbalchemysim0.alignmentset.chunk1contigs.xml datasets/pbalchemysim0.alignmentset.chunk1contigs.xml
bax2bam datasets/pbalchemysim1.bas.h5 -o datasets/pbalchemysim1 --pulsefeatures DeletionTag,DeletionQV,InsertionQV,MergeQV,SubstitutionQV && \
samtools index datasets/pbalchemysim1.subreads.bam
pbindex datasets/pbalchemysim1.subreads.bam
dataset.py create --type SubreadSet --relative datasets/pbalchemysim1.subreadset.xml datasets/pbalchemysim1.subreads.bam
pbalign datasets/pbalchemysim1.subreads.bam datasets/pbalchemysim0.reference.fasta datasets/pbalchemysim1.pbalign.bam && \
dataset.py create --type AlignmentSet --relative datasets/pbalchemysim1.alignmentset.xml datasets/pbalchemysim1.pbalign.bam
dataset.py create --type AlignmentSet --relative datasets/pbalchemysim.alignmentset.xml datasets/pbalchemysim0.alignmentset.xml datasets/pbalchemysim1.alignmentset.xml
dataset.py create --type SubreadSet --relative datasets/subreadSetWithStats.xml datasets/pbalchemysim0.subreadset.xml
dataset.py loadstats datasets/subreadSetWithStats.xml datasets/m150430_142051_Mon_p1_b25.sts.xml
dataset.py loadstats datasets/subreadSetWithStats.xml datasets/m150616_053259_ethan_c100710482550000001823136404221563_s1_p0.sts.xml
dataset.py create --type SubreadSet --relative datasets/subreadSetWithStats.xml datasets/subreadSetWithStats.xml
dataset.py create --type HdfSubreadSet --relative datasets/pbalchemysim0.hdfsubreadset.xml datasets/pbalchemysim0.bas.h5
dataset.py create --type HdfSubreadSet --relative datasets/pbalchemysim1.hdfsubreadset.xml datasets/pbalchemysim1.bas.h5
dataset.py create --type HdfSubreadSet --relative datasets/pbalchemysim.hdfsubreadset.xml datasets/pbalchemysim0.bas.h5 datasets/pbalchemysim1.bas.h5
upstream_testdata:
dataset.py create --type ReferenceSet --relative datasets/lambda.referenceset.xml lambdaNEB.fa
dataset.py create --type AlignmentSet --relative datasets/lambda.alignmentset.xml $(MOVIE1).aligned_subreads.bam
xsdupdate:
cp ../../../../../doc/FileFormats/examples/datasets/*.xml datasets/.