Skip to content
Commits on Source (3)
Copyright (c) 2016, Pacific Biosciences of California, Inc.
Copyright (c) 2016-2018, Pacific Biosciences of California, Inc.
All rights reserved.
......
include LICENSES.txt
include LICENSE
pbalign maps PacBio reads to reference sequences.
<h1 align="center"><img src="http://www.pacb.com/wp-content/themes/pacific-biosciences/img/pacific-biosciences-logo-mobile.svg"/></h1>
<h1 align="center">pbalign</h1>
<p align="center">A python wrapper to easily align PacBio reads to reference sequences</p>
Want to know how to install and run pbalign?
***
## Availability
Latest version can be installed via bioconda package `pbalign`.
Please refer to our [official pbbioconda page](https://github.com/PacificBiosciences/pbbioconda)
for information on Installation, Support, License, Copyright, and Disclaimer.
## How To?
Want to know how to manually install or run pbalign?
Please refer to https://github.com/PacificBiosciences/pbalign/blob/master/doc/howto.rst
......
pbalign
=======
Some good soul needs to add a Python3 package.
pbalign (0.3.2-1) unstable; urgency=medium
* Team upload
* Standards-Version: 4.2.1
* d/rules: clean up all build remnants
* TODO: Python3-version, added README.Debian as a reminder
-- Steffen Moeller <moeller@debian.org> Sat, 26 Jan 2019 13:20:46 +0100
pbalign (0.3.1-1) unstable; urgency=medium
* Team upload.
......
......@@ -12,7 +12,7 @@ Build-Depends: debhelper (>= 11~),
python-pbcommand,
python3-sphinx,
python-nose
Standards-Version: 4.2.1
Standards-Version: 4.3.0
Vcs-Browser: https://salsa.debian.org/med-team/pbalign
Vcs-Git: https://salsa.debian.org/med-team/pbalign.git
Homepage: https://github.com/PacificBiosciences/pbalign
......
......@@ -58,3 +58,10 @@ no_override_dh_installmanpages:
$(HELP2MAN) --name="$${description[`basename $$executable`]}" \
$$executable > $(MANDIR)/`basename $$executable.1`; \
done
override_dh_auto_clean:
dh_auto_clean
rm -rf doc/_build/
rm -f doc/pbalign.alignservice.rst doc/pbalign.rst doc/pbalign.tasks.rst doc/pbalign.tools.rst doc/pbalign.utils.rst
rm -rf pbalign.egg-info
......@@ -57,7 +57,7 @@ def get_changelist():
return _get_changelist(_changelist)
VERSION = (0, 3, 1)
VERSION = (0, 4, 1)
def get_version():
......
......@@ -37,7 +37,7 @@
# Author: Yuan Li
from __future__ import absolute_import
from __future__ import absolute_import, division, print_function
import logging
from pbalign.service import Service
from pbalign.utils.progutil import Execute
......@@ -92,12 +92,13 @@ class BamPostService(Service):
except Exception:
pass
_stvmajor = int(_samtoolsversion[0])
sort_nproc = max(1, nproc//4)
if _stvmajor >= 1:
cmd = 'samtools sort --threads {t} -m 4G -o {sortedBamFile} {unsortedBamFile}'.format(
t=nproc, sortedBamFile=sortedBamFile, unsortedBamFile=unsortedBamFile)
cmd = 'samtools sort --threads {t} -m 768M -o {sortedBamFile} {unsortedBamFile}'.format(
t=sort_nproc, sortedBamFile=sortedBamFile, unsortedBamFile=unsortedBamFile)
else:
cmd = 'samtools sort --threads {t} -m 4G {unsortedBamFile} {prefix}'.format(
t=nproc, unsortedBamFile=unsortedBamFile, prefix=sortedPrefix)
cmd = 'samtools sort --threads {t} -m 768M {unsortedBamFile} {prefix}'.format(
t=sort_nproc, unsortedBamFile=unsortedBamFile, prefix=sortedPrefix)
Execute(self.name, cmd)
def _makebai(self, sortedBamFile, outBaiFile):
......
# FIXME this should probably live somewhere more general, e.g. pbdataset?
"""
Consolidate AlignmentSet .bam files
"""
import functools
import tempfile
import logging
import os.path as op
import os
import sys
from pbcommand.models import get_pbparser, FileTypes, ResourceTypes, DataStore, DataStoreFile
from pbcommand.cli import pbparser_runner
from pbcommand.utils import setup_log
from pbcore.io import openDataSet
class Constants(object):
TOOL_ID = "pbalign.tasks.consolidate_alignments"
VERSION = "0.2.0"
DRIVER = "python -m pbalign.tasks.consolidate_alignments --resolved-tool-contract "
CONSOLIDATE_ID = "pbalign.task_options.consolidate_aligned_bam"
N_FILES_ID = "pbalign.task_options.consolidate_n_files"
BAI_FILE_TYPES = {
FileTypes.BAMBAI.file_type_id,
FileTypes.I_BAI.file_type_id
}
def get_parser(tool_id=Constants.TOOL_ID,
file_type=FileTypes.DS_ALIGN,
driver_exe=Constants.DRIVER,
version=Constants.VERSION,
description=__doc__):
ds_type = file_type.file_type_id.split(".")[-1]
p = get_pbparser(tool_id,
version,
"{t} consolidate".format(t=ds_type),
description,
driver_exe,
is_distributed=True,
resource_types=(ResourceTypes.TMP_DIR,))
p.add_input_file_type(file_type,
"align_in",
"Input {t}".format(t=ds_type),
"Gathered {t} to consolidate".format(t=ds_type))
p.add_output_file_type(file_type,
"ds_out",
"Alignments",
description="Alignment results dataset",
default_name="combined")
p.add_output_file_type(FileTypes.DATASTORE,
"datastore",
"JSON Datastore",
description="Datastore containing BAM resource",
default_name="resources")
p.add_boolean(Constants.CONSOLIDATE_ID, "consolidate",
default=False,
name="Consolidate .bam",
description="Merge chunked/gathered .bam files")
p.add_int(Constants.N_FILES_ID, "consolidate_n_files",
default=1,
name="Number of .bam files",
description="Number of .bam files to create in consolidate mode")
return p
def run_consolidate(dataset_file, output_file, datastore_file,
consolidate, n_files, task_id=Constants.TOOL_ID):
datastore_files = []
with openDataSet(dataset_file) as ds_in:
if consolidate:
if len(ds_in.toExternalFiles()) != 1:
new_resource_file = op.splitext(output_file)[0] + ".bam"
ds_in.consolidate(new_resource_file, numFiles=n_files)
# always display the BAM/BAI if consolidation is enabled
# XXX there is no uniqueness constraint on the sourceId, but this
# seems sloppy nonetheless - unfortunately I don't know how else to
# make view rule whitelisting work
for ext_res in ds_in.externalResources:
if ext_res.resourceId.endswith(".bam"):
ds_file = DataStoreFile(
ext_res.uniqueId,
task_id + "-out-2",
ext_res.metaType,
ext_res.bam)
datastore_files.append(ds_file)
for index in ext_res.indices:
if index.metaType in Constants.BAI_FILE_TYPES:
ds_file = DataStoreFile(
index.uniqueId,
task_id + "-out-3",
index.metaType,
index.resourceId)
datastore_files.append(ds_file)
ds_in.newUuid()
ds_in.write(output_file)
datastore = DataStore(datastore_files)
datastore.write_json(datastore_file)
return 0
def args_runner(args, task_id=Constants.TOOL_ID):
return run_consolidate(
dataset_file=args.align_in,
output_file=args.ds_out,
datastore_file=args.datastore,
consolidate=args.consolidate,
n_files=args.consolidate_n_files,
task_id=task_id)
def rtc_runner(rtc, task_id=Constants.TOOL_ID):
tempfile.tempdir = rtc.task.tmpdir_resources[0].path
return run_consolidate(
dataset_file=rtc.task.input_files[0],
output_file=rtc.task.output_files[0],
datastore_file=rtc.task.output_files[1],
consolidate=rtc.task.options[Constants.CONSOLIDATE_ID],
n_files=rtc.task.options[Constants.N_FILES_ID],
task_id=task_id)
def main(argv=sys.argv):
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger()
return pbparser_runner(argv[1:],
get_parser(),
args_runner,
rtc_runner,
log,
setup_log)
if __name__ == '__main__':
sys.exit(main())
"""
Consolidate ConsensusAlignmentSet .bam files
"""
import logging
import sys
from pbcommand.models import FileTypes
from pbcommand.cli import pbparser_runner
from pbcommand.utils import setup_log
from pbalign.tasks.consolidate_alignments import Constants as BaseConstants, get_parser, args_runner, rtc_runner
class Constants(BaseConstants):
TOOL_ID = "pbalign.tasks.consolidate_alignments_ccs"
VERSION = "0.1.0"
DRIVER = "python -m pbalign.tasks.consolidate_alignments_ccs --resolved-tool-contract "
def main(argv=sys.argv):
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger()
p = get_parser(
tool_id=Constants.TOOL_ID,
file_type=FileTypes.DS_ALIGN_CCS,
driver_exe=Constants.DRIVER,
version=Constants.VERSION,
description=__doc__)
return pbparser_runner(argv[1:],
p,
lambda rtc: args_runner(rtc, Constants.TOOL_ID),
lambda rtc: rtc_runner(rtc, Constants.TOOL_ID),
log,
setup_log)
if __name__ == '__main__':
sys.exit(main())
......@@ -111,7 +111,7 @@ class ExtractRunner(PBToolRunner):
def run(self):
"""Executes the body of the script."""
logging.info("Running {f} v{v}.".format(f=op.basename(__file__),
logging.info("Running {f} v{v}.".format(f="extractUnmappedSubreads.py",
v=self.getVersion))
args = self.args
logging.info("Extracting unmapped reads from a fasta file.")
......
......@@ -13,7 +13,7 @@ from pbcore.io import CmpH5Reader, EmptyCmpH5Error
import traceback
from pbalign.utils.RgnH5IO import RgnH5Reader, RgnH5Writer
__VERSION__ = "0.3"
__VERSION__ = "0.3.2"
class AlignedReadsMasker(object):
......@@ -187,7 +187,7 @@ def main():
rcode = run(args.inCmpFile, args.inRgnFofn, args.outRgnFofn)
logging.info("Exiting {f} {v} with rturn code {r}.".format(
r=rcode, f=os.path.basename(__file__), v=__VERSION__))
r=rcode, f="mask_aligned_reads.py", v=__VERSION__))
return rcode
if __name__ == "__main__":
......
......@@ -61,10 +61,10 @@ def Execute(name, cmd):
errMsg : the error message
"""
logging.info(name + ": Call \"{0}\"".format(cmd))
output, errCode, errMsg = backticks(cmd, merge_stderr=True)
output, errCode, errMsg = backticks(cmd, merge_stderr=False)
if errCode != 0:
errMsg = "%s returned a non-zero exit status %s. CMD: '%s'\nERROR: %s\nOutput:%r" % \
(name, cmd, errCode, errMsg, output)
errMsg = "%s returned a non-zero exit status %d\nCMD: '%s'\nERROR: %s\nOutput:%r" % \
(name, errCode, cmd, errMsg, output)
logging.error(errMsg)
raise RuntimeError(errMsg)
return output, errCode, errMsg
......@@ -6,7 +6,6 @@ set -vex
#########
${PIP} install --user \
$NX3PBASEURL/pythonpkgs/pysam-0.13-cp27-cp27mu-linux_x86_64.whl \
$NX3PBASEURL/pythonpkgs/xmlbuilder-1.0-cp27-none-any.whl \
$NX3PBASEURL/pythonpkgs/avro-1.7.7-cp27-none-any.whl \
iso8601 \
......@@ -14,6 +13,7 @@ ${PIP} install --user \
cram \
nose
${PIP} install --user pysam==0.15.1
${PIP} install --user pylint
${PIP} install --user -e repos/PacBioTestData
${PIP} install --user -e repos/pbcore
......
from setuptools import setup, Extension, find_packages
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
from setuptools import setup, find_packages
setup(
name='pbalign',
version='0.3.1',
version='0.3.2',
author='Pacific Biosciences',
author_email='devnet@pacificbiosciences.com',
license='LICENSE.txt',
license='BSD-3-Clause-Clear',
packages=find_packages(),
zip_safe=False,
install_requires=[
'pbcore >= 0.8.5',
'pbcommand >= 0.2.0',
'pysam',
'pysam >= 0.15.1',
],
test_requires=[
'nose',
......
......@@ -59,55 +59,5 @@ class TestPbalignMinorVariants(pbcommand.testkit.PbTestApp):
type(ds_out).__name__)
HAVE_PBMERGE = False
try:
with tempfile.TemporaryFile() as O, \
tempfile.TemporaryFile() as E:
assert subprocess.call(["pbmerge", "--help"], stdout=O, stderr=E) == 0
except Exception as e:
sys.stderr.write(str(e)+"\n")
sys.stderr.write("pbmerge missing, skipping test\n")
else:
HAVE_PBMERGE = True
@unittest.skipUnless(HAVE_PBMERGE, "pbmerge not installed")
class TestConsolidateBam(pbcommand.testkit.PbTestApp):
DRIVER_BASE = "python -m pbalign.tasks.consolidate_alignments"
INPUT_FILES = [pbtestdata.get_file("aligned-ds-2")]
TASK_OPTIONS = {
"pbalign.task_options.consolidate_aligned_bam": True,
}
def run_after(self, rtc, output_dir):
with openDataSet(rtc.task.output_files[0]) as f:
f.assertIndexed()
self.assertEqual(len(f.toExternalFiles()), 1)
# test for bug 33778
qnames = set()
for rec in f:
qnames.add(rec.qName)
self.assertEqual(len(qnames), len(f))
ds = DataStore.load_from_json(rtc.task.output_files[1])
self.assertEqual(len(ds.files), 2)
@unittest.skipUnless(HAVE_PBMERGE, "pbmerge not installed")
class TestConsolidateBamDisabled(TestConsolidateBam):
TASK_OPTIONS = {
"pbalign.task_options.consolidate_aligned_bam": False,
}
def run_after(self, rtc, output_dir):
with AlignmentSet(rtc.task.output_files[0]) as f:
self.assertEqual(len(f.toExternalFiles()), 2)
@unittest.skipUnless(HAVE_PBMERGE, "pbmerge not installed")
class TestConsolidateBamCCS(TestConsolidateBam):
DRIVER_BASE = "python -m pbalign.tasks.consolidate_alignments_ccs"
INPUT_FILES = [pbtestdata.get_file("rsii-ccs-aligned")]
if __name__ == "__main__":
unittest.main()