Skip to content
Commits on Source (9)
......@@ -22,16 +22,22 @@ class ResultCollector(object):
def _run(self):
self.onStart()
sentinelsReceived = 0
while sentinelsReceived < options.numWorkers:
result = self._resultsQueue.get()
if result is None:
sentinelsReceived += 1
else:
self.onResult(result)
self.onFinish()
try:
sentinelsReceived = 0
while sentinelsReceived < options.numWorkers:
result = self._resultsQueue.get()
if result is None:
sentinelsReceived += 1
else:
self.onResult(result)
finally:
# Even on error, we want the files to be closed.
# Otherwise, e.g., we could end up with empty .gz files,
# which are invalid.
# And for debugging, we should see the current state of output.
# This will run even on KeyboardInterrupt.
self.onFinish()
logging.info("Analysis completed.")
def run(self):
if options.doProfiling:
......@@ -80,7 +86,6 @@ class ResultCollector(object):
self._flushContigIfCompleted(window)
def onFinish(self):
logging.info("Analysis completed.")
if self.fastaWriter: self.fastaWriter.close()
if self.fastqWriter: self.fastqWriter.close()
if self.gffWriter: self.gffWriter.close()
......
# Author: David Alexander, David Seifert
from __future__ import absolute_import, division, print_function
__VERSION__ = '2.3.2' # don't forget to update setup.py and doc/conf.py too
__VERSION__ = '2.3.3' # don't forget to update setup.py and doc/conf.py too
......@@ -11,7 +11,7 @@ from ..ResultCollector import ResultCollectorProcess, ResultCollectorThread
from GenomicConsensus.consensus import Consensus, ArrowConsensus, join
from GenomicConsensus.windows import kSpannedIntervals, holes, subWindow
from GenomicConsensus.variants import filterVariants, annotateVariants
from GenomicConsensus.variants import annotateVariants
from GenomicConsensus.arrow import diploid
from GenomicConsensus.utils import die
......@@ -99,14 +99,11 @@ def consensusAndVariantsForWindow(alnFile, refWindow, referenceContig,
options.aligner, ai=None,
diploid=arrowConfig.polishDiploid)
filteredVars = filterVariants(options.minCoverage,
options.minConfidence,
variants_)
# Annotate?
if options.annotateGFF:
annotateVariants(filteredVars, clippedAlns)
annotateVariants(variants_, clippedAlns)
variants += filteredVars
variants += variants_
# The nascent consensus sequence might contain ambiguous bases, these
# need to be removed as software in the wild cannot deal with such
......
......@@ -34,7 +34,7 @@ class ArrowConfig(object):
computeConfidence=True,
readStumpinessThreshold=0.1,
minReadScore=0.75,
minHqRegionSnr=3.75,
minHqRegionSnr=2.5,
minZScore=-3.5,
minAccuracy=0.82,
maskRadius=0,
......
......@@ -47,6 +47,9 @@ class VariantsGffWriter(object):
def __init__(self, f, optionsDict, referenceEntries):
self._gffWriter = GffWriter(f)
self._minConfidence = optionsDict["minConfidence"]
self._minCoverage = optionsDict["minCoverage"]
self._gffWriter.writeHeader("##pacbio-variant-version 2.1")
self._gffWriter.writeHeader("##date %s" % time.ctime())
self._gffWriter.writeHeader("##feature-ontology %s" % self.ONTOLOGY_URL)
......@@ -61,7 +64,8 @@ class VariantsGffWriter(object):
def writeVariants(self, variants):
for var in variants:
self._gffWriter.writeRecord(toGffRecord(var))
if var.coverage >= self._minCoverage and var.confidence >= self._minConfidence:
self._gffWriter.writeRecord(toGffRecord(var))
def close(self):
self._gffWriter.close()
......@@ -21,8 +21,11 @@ class VariantsVcfWriter(object):
def __init__(self, f, optionsDict, referenceEntries):
self._vcfFile = open(f, "w")
self._minConfidence = optionsDict["minConfidence"]
self._minCoverage = optionsDict["minCoverage"]
print(dedent('''\
##fileformat=VCFv4.3
##fileformat=VCFv4.2
##fileDate={date}
##source=GenomicConsensusV{version}
##reference={reference}''').format(
......@@ -37,6 +40,26 @@ class VariantsVcfWriter(object):
length=entry.length
# TODO(lhepler): evaluate adding md5 hexdigest here on large genomes
), file=self._vcfFile)
print('##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">',
file=self._vcfFile)
if optionsDict["diploid"]:
print('##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">',
file=self._vcfFile)
# filters
self._minConfidenceFilterID = 'q{}'.format(self._minConfidence)
if self._minConfidence > 0:
print('##FILTER=<ID={id},Description="Quality below {confidence}">'.format(
id=self._minConfidenceFilterID,
confidence=self._minConfidence
), file=self._vcfFile)
self._minCoverageFilterID = 'c{}'.format(self._minCoverage)
if self._minCoverage > 0:
print('##FILTER=<ID={id},Description="Coverage below {coverage}">'.format(
id=self._minCoverageFilterID,
coverage=self._minCoverage
), file=self._vcfFile)
print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", file=self._vcfFile)
def writeVariants(self, variants):
......@@ -79,6 +102,15 @@ class VariantsVcfWriter(object):
info = "DP={0}".format(var.coverage)
if freq:
info = info + ";" + freq
# failed filters
failedFilters = []
if var.confidence < self._minConfidence:
failedFilters.append(self._minConfidenceFilterID)
if var.coverage < self._minCoverage:
failedFilters.append(self._minCoverageFilterID)
filterText = ";".join(failedFilters) if failedFilters else "PASS"
print("{chrom}\t{pos}\t{id}\t{ref}\t{alt}\t{qual}\t{filter}\t{info}".format(
chrom=reference.idToFullName(var.refId),
pos=pos,
......@@ -86,7 +118,7 @@ class VariantsVcfWriter(object):
ref=ref,
alt=alt,
qual=var.confidence,
filter="PASS",
filter=filterText,
info=info), file=self._vcfFile)
def close(self):
......
......@@ -59,7 +59,7 @@ class Constants(object):
DEFAULT_MAX_COVERAGE = 100
DEFAULT_MIN_MAPQV = 10
DEFAULT_MIN_READSCORE = 0.65
DEFAULT_MIN_HQREGIONSNR = 3.75
DEFAULT_MIN_HQREGIONSNR = 2.5
DEFAULT_MIN_ZSCORE = -3.5
DEFAULT_MIN_ACCURACY = 0.82
DEFAULT_MASK_RADIUS = 3
......
......@@ -11,7 +11,7 @@ from .. import reference
from ..options import options
from ..consensus import Consensus, join
from ..windows import kSpannedIntervals, holes, subWindow
from ..variants import Variant, filterVariants, annotateVariants
from ..variants import Variant, annotateVariants
from ..Worker import WorkerProcess, WorkerThread
from ..ResultCollector import ResultCollectorProcess, ResultCollectorThread
from ..arrow.utils import variantsFromAlignment
......@@ -32,7 +32,7 @@ class PoaConfig(object):
noEvidenceConsensus="nocall",
readStumpinessThreshold=0.1,
minReadScore=0.75,
minHqRegionSnr=3.75):
minHqRegionSnr=2.5):
self.aligner = aligner
self.minMapQV = minMapQV
self.minPoaCoverage = minPoaCoverage
......@@ -191,14 +191,11 @@ def poaConsensusAndVariants(alnFile, refWindow, referenceContig,
consensusAndVariantsForAlignments(subWin, intRefSeq,
clippedAlns, poaConfig)
filteredVars = filterVariants(options.minCoverage,
options.minConfidence,
variants_)
# Annotate?
if options.annotateGFF:
annotateVariants(filteredVars, clippedAlns)
annotateVariants(variants_, clippedAlns)
variants += filteredVars
variants += variants_
else:
css = Consensus.noCallConsensus(poaConfig.noEvidenceConsensus,
subWin, intRefSeq)
......
......@@ -11,7 +11,7 @@ from ..ResultCollector import ResultCollectorProcess, ResultCollectorThread
from GenomicConsensus.consensus import Consensus, QuiverConsensus, join
from GenomicConsensus.windows import kSpannedIntervals, holes, subWindow
from GenomicConsensus.variants import filterVariants, annotateVariants
from GenomicConsensus.variants import annotateVariants
from GenomicConsensus.quiver import diploid
import GenomicConsensus.quiver.model as M
......@@ -99,14 +99,11 @@ def consensusAndVariantsForWindow(cmpH5, refWindow, referenceContig,
options.aligner,
mms=None)
filteredVars = filterVariants(options.minCoverage,
options.minConfidence,
variants_)
# Annotate?
if options.annotateGFF:
annotateVariants(filteredVars, clippedAlns)
annotateVariants(variants_, clippedAlns)
variants += filteredVars
variants += variants_
else:
css = QuiverConsensus.noCallConsensus(quiverConfig.noEvidenceConsensus,
subWin, intRefSeq)
......
......@@ -88,11 +88,6 @@ class Variant(CommonEqualityMixin):
self.annotations.append((key, value))
def filterVariants(minCoverage, minConfidence, variants):
return [ v for v in variants
if ((v.coverage >= minCoverage) and
(v.confidence >= minConfidence)) ]
def annotateVariants(variants, alns):
# Operates in place
for v in variants:
......
include LICENSES
include LICENSE
# Quiver model definitions
graft GenomicConsensus/quiver/resources
GenomicConsensus (quiver, arrow) [![Circle CI](https://circleci.com/gh/PacificBiosciences/GenomicConsensus.svg?style=svg)](https://circleci.com/gh/PacificBiosciences/GenomicConsensus)
-------------------------
<h1 align="center"><img src="http://www.pacb.com/wp-content/themes/pacific-biosciences/img/pacific-biosciences-logo-mobile.svg"/></h1>
<h1 align="center">GenomicConsensus</h1>
<p align="center">Genome polishing and variant calling.</p>
***
The ``GenomicConsensus`` package provides the ``variantCaller`` tool,
which allows you to apply the Quiver or Arrow algorithm to mapped
PacBio reads to get consensus and variant calls.
Background on Quiver and Arrow
------------------------------
## Availability
Latest version can be installed via bioconda package `genomicconsensus`.
Please refer to our [official pbbioconda page](https://github.com/PacificBiosciences/pbbioconda)
for information on Installation, Support, License, Copyright, and Disclaimer.
## Background on Quiver and Arrow
*Quiver* is the legacy consensus model based on a conditional random
field approach. Quiver enables consensus accuracies on genome
......@@ -24,13 +32,7 @@ Quiver is supported for PacBio RS data. Arrow is supported for PacBio
Sequel data and RS data with the P6-C4 chemistry.
Getting GenomicConsensus
------------------------
Casual users should get ``GenomicConsensus`` from the
[SMRTanalysis software bundle](http://www.pacb.com/support/software-downloads/).
Running
## Running
-------
Basic usage is as follows:
......@@ -56,8 +58,7 @@ FASTQ file.
release of SMRTanalysis 3.0 or build from GitHub sources.*
More documentation
------------------
## More documentation
- [More detailed installation and running instructions](./doc/HowTo.rst)
- [FAQ](./doc/FAQ.rst)
......
#!/bin/bash
#!/usr/bin/env bash
set -vex
bash -vex scripts/ci/build.sh $@
################
# DEPENDENCIES #
################
## Load modules
type module >& /dev/null || . /mnt/software/Modules/current/init/bash
module purge
# python deps
module load python/2
module load cram
module load swig
# One fairly fast cram-test,
# quiver-tinyLambda-coverage-islands.t,
# was moved from cram/internal. It needs some GNU modules.
# If that becomes a problem, just move it back to cram/internal.
module load mummer
module load blasr/2.3.0
module load exonerate/2.0.0
module load gfftools/dalexander
case "${bamboo_planRepository_branchName}" in
master)
module load ConsensusCore/master
module load unanimity/master
;;
*)
module load ConsensusCore/develop
module load unanimity/develop
;;
esac
## Use PYTHONUSERBASE in lieu of virtualenv
export PYTHONUSERBASE="${PWD}/build"
export PATH="${PYTHONUSERBASE}/bin:${PATH}"
source scripts/ci/setup.sh
source scripts/ci/build.sh
source scripts/ci/test.sh
......@@ -57,7 +57,20 @@ class CoverageBedRecord(BedRecord):
bed.chromStart = gff.start - 1
bed.chromEnd = gff.end
bed.name = 'meanCov'
bed.score = float(gff.cov2.split(',')[0])
# 'coverage' field output by variantCaller nowadays
score = getattr(gff, 'coverage', None)
if score is None:
# backwards compatible field from quiver days
score = getattr(gff, 'cov2', None)
if score is None:
logging.error('Could not determine coverage from input GFF')
sys.exit(-1)
else:
bed.score = float(score.split(',')[0])
else:
bed.score = float(score)
bed.strand = gff.strand
return bed
......
pbgenomicconsensus (2.3.3-1) UNRELEASED; urgency=medium
* Use 2to3 to port from Python2 to Python3
Closes: #937255
* New upstream version
* debhelper-compat 12
* Standards-Version: 4.4.1
TODO: unanimity to get python3-consensuscore2
-- Andreas Tille <tille@debian.org> Wed, 18 Dec 2019 16:36:56 +0100
pbgenomicconsensus (2.3.2-5) unstable; urgency=medium
* Team upload.
......
......@@ -3,26 +3,26 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.
Uploaders: Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 12~),
Build-Depends: debhelper-compat (= 12),
dh-python,
pandoc,
python-pkg-resources,
python-all,
python-setuptools,
python-pbconsensuscore,
python-pbcore (>= 1.2.9),
python-pbcommand (>= 0.3.20),
python-h5py,
python-numpy,
python3-pkg-resources,
python3-all,
python3-setuptools,
python3-pbconsensuscore,
python3-pbcore,
python3-pbcommand,
python3-h5py,
python3-numpy,
# Test-Depends:
# pbtestdata, (#832311)
python-nose,
python-cram,
python-pytest,
python-consensuscore2,
python3-nose <!nocheck>,
python3-cram <!nocheck>,
python3-pytest <!nocheck>,
python3-consensuscore2 <!nocheck>,
mummer,
exonerate
Standards-Version: 4.3.0
Standards-Version: 4.4.1
Vcs-Browser: https://salsa.debian.org/med-team/pbgenomicconsensus
Vcs-Git: https://salsa.debian.org/med-team/pbgenomicconsensus.git
Homepage: https://github.com/PacificBiosciences/GenomicConsensus
......@@ -30,10 +30,10 @@ Homepage: https://github.com/PacificBiosciences/GenomicConsensus
Package: pbgenomicconsensus
Architecture: all
Depends: ${misc:Depends},
${python:Depends},
python-pkg-resources,
python-pbgenomicconsensus (= ${source:Version})
Recommends: python-consensuscore2
${python3:Depends},
python3-pkg-resources,
python3-pbgenomicconsensus (= ${source:Version})
Recommends: python3-consensuscore2
Description: Pacific Biosciences variant and consensus caller
The GenomicConsensus package provides Quiver, Pacific Biosciences'
flagship consensus and variant caller. Quiver is an algorithm that finds
......@@ -45,16 +45,16 @@ Description: Pacific Biosciences variant and consensus caller
.
This package is part of the SMRTAnalysis suite
Package: python-pbgenomicconsensus
Package: python3-pbgenomicconsensus
Architecture: all
Section: python
Depends: ${misc:Depends},
${python:Depends},
python-pbconsensuscore,
python-h5py,
python-numpy
Suggests: python-consensuscore2
Description: Pacific Biosciences variant and consensus caller (Python 2)
${python3:Depends},
python3-pbconsensuscore,
python3-h5py,
python3-numpy
Suggests: python3-consensuscore2
Description: Pacific Biosciences variant and consensus caller (Python3)
The GenomicConsensus package provides Quiver, Pacific Biosciences'
flagship consensus and variant caller. Quiver is an algorithm that finds
the maximum likelihood template sequence given PacBio reads of the template.
......@@ -63,5 +63,5 @@ Description: Pacific Biosciences variant and consensus caller (Python 2)
the base sequence of each read, Quiver uses several additional quality value
covariates that the base caller provides.
.
This package is part of the SMRTAnalysis suite and provides the Python 2
This package is part of the SMRTAnalysis suite and provides the Python2
backend library.
This diff is collapsed.
......@@ -6,3 +6,4 @@ ignore_test_requiring_pbtestdata.patch
ignore_test_using_local_data.patch
ignore_warnings.patch
use_frombuffer.patch
2to3.patch