Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille
--- a/GenomicConsensus/ResultCollector.py
+++ b/GenomicConsensus/ResultCollector.py
 # Author: David Alexander, Jim Drake
+from __future__ import absolute_import, division, print_function

 import cProfile, logging, os.path, sys
 from multiprocessing import Process

--- a/GenomicConsensus/Worker.py
+++ b/GenomicConsensus/Worker.py
 # Author: David Alexander, Jim Drake
+from __future__ import absolute_import, division, print_function

 import cProfile, logging, os.path
 from multiprocessing import Process

--- a/GenomicConsensus/__init__.py
+++ b/GenomicConsensus/__init__.py
 # Author: David Alexander, David Seifert
+from __future__ import absolute_import, division, print_function

-__VERSION__ = '2.2.2'  # don't forget to update setup.py and doc/conf.py too
+__VERSION__ = '2.3.2'  # don't forget to update setup.py and doc/conf.py too
--- a/GenomicConsensus/algorithmSelection.py
+++ b/GenomicConsensus/algorithmSelection.py
 # Author: David Alexander
+from __future__ import absolute_import, division, print_function

 from .utils import die


--- a/GenomicConsensus/arrow/__init__.py
+++ b/GenomicConsensus/arrow/__init__.py
 # Authors: David Alexander, Lance Hepler
-
-from __future__ import absolute_import
+from __future__ import absolute_import, division, print_function

 from . import utils
 from . import model
-# import evidence
--- a/GenomicConsensus/arrow/arrow.py
+++ b/GenomicConsensus/arrow/arrow.py
 # Authors: David Alexander, Lance Hepler
+from __future__ import absolute_import, division, print_function

 import logging, os.path
 import ConsensusCore2 as cc, numpy as np
@@ -11,7 +12,6 @@ from ..ResultCollector import ResultCollectorProcess, ResultCollectorThread
 from GenomicConsensus.consensus import Consensus, ArrowConsensus, join
 from GenomicConsensus.windows import kSpannedIntervals, holes, subWindow
 from GenomicConsensus.variants import filterVariants, annotateVariants
-from GenomicConsensus.arrow.evidence import ArrowEvidence
 from GenomicConsensus.arrow import diploid
 from GenomicConsensus.utils import die

@@ -113,29 +113,6 @@ def consensusAndVariantsForWindow(alnFile, refWindow, referenceContig,
            # characters and we only use IUPAC for *internal* bookkeeping.
            if arrowConfig.polishDiploid:
                css.sequence = newPureCss
-
-            # Dump?
-            maybeDumpEvidence = \
-                ((options.dumpEvidence == "all") or
-                 (options.dumpEvidence == "outliers") or
-                 (options.dumpEvidence == "variants") and
-                 (len(variants) > 0) and css.hasEvidence)
-            if maybeDumpEvidence:
-                refId, refStart, refEnd = subWin
-                refName = reference.idToName(refId)
-                windowDirectory = os.path.join(
-                    options.evidenceDirectory,
-                    refName,
-                    "%d-%d" % (refStart, refEnd))
-                ev = ArrowEvidence.fromConsensus(css)
-                if options.dumpEvidence != "outliers":
-                    ev.save(windowDirectory)
-                elif (np.max(ev.delta) > 20):
-                    # Mathematically I don't think we should be seeing
-                    # deltas > 6 in magnitude, but let's just restrict
-                    # attention to truly bonkers outliers.
-                    ev.save(windowDirectory)
-
        else:
            css = ArrowConsensus.noCallConsensus(arrowConfig.noEvidenceConsensus,
                                                 subWin, intRefSeq)

--- a/GenomicConsensus/arrow/diploid.py
+++ b/GenomicConsensus/arrow/diploid.py
 # Authors: David Alexander, Lance Hepler
+from __future__ import absolute_import, division, print_function

 from GenomicConsensus.arrow.utils import allSingleBaseMutations
 from GenomicConsensus.variants import Variant

--- a/GenomicConsensus/arrow/evidence.py
+++ b/GenomicConsensus/arrow/evidence.py
-# Authors: David Alexander, Lance Hepler
-
-__all__ = [ "ArrowEvidence" ]
-
-import logging, os.path, numpy as np
-from collections import namedtuple
-from itertools import groupby
-from bisect import bisect_left, bisect_right
-from pbcore.io import FastaReader, FastaWriter
-from .utils import scoreMatrix
-from .. import reference
-
-class ArrowEvidence(object):
-    """If used at runtime, this will require h5py
-    (for storing/loading 'arrow-scores.h5')
-    """
-    Mutation = namedtuple("Mutation", ("Position", "Type", "FromBase", "ToBase"))
-
-    @staticmethod
-    def _parseMutName(mutName):
-        fields = mutName.split(" ")
-        pos = int(fields[0])
-        type, fromBase, _, toBase = fields[1:]
-        return ArrowEvidence.Mutation(pos, type, fromBase, toBase)
-
-    def __init__(self, refWindow, consensus, rowNames, colNames, baselineScores, scores):
-        assert isinstance(consensus, str)
-        self.refWindow      = refWindow # tuple(str, int, int)
-        self.consensus      = consensus
-        self.rowNames       = rowNames
-        self.colNames       = colNames
-        self.baselineScores = baselineScores
-        self.scores         = scores
-        self.muts           = map(ArrowEvidence._parseMutName, self.colNames)
-
-    @staticmethod
-    def fromConsensus(css):
-        rowNames, colNames, baselineScores, scores = scoreMatrix(css.ai)
-        return ArrowEvidence(css.refWindow,
-                             css.sequence,
-                             rowNames,
-                             colNames,
-                             baselineScores,
-                             scores)
-    @property
-    def refName(self):
-        return self.refWindow[0]
-
-    @property
-    def refStart(self):
-        return self.refWindow[1]
-
-    @property
-    def refEnd(self):
-        return self.refWindow[2]
-
-    @property
-    def positions(self):
-        return  [ mut.Position for mut in self.muts ]
-
-    @property
-    def uniquePositions(self):
-        return sorted(list(set(self.positions)))
-
-    @property
-    def delta(self):
-        return self.scores - self.baselineScores[:, np.newaxis]
-
-    @staticmethod
-    def load(dir):
-        """
-        Load an ArrowEvidence from a directory
-        """
-        if dir.endswith("/"): dir = dir[:-1]
-        refStart, refEnd = map(int, dir.split("/")[-1].split("-"))
-        refName = dir.split("/")[-2]
-        refWindow = (refName, refStart, refEnd)
-        with FastaReader(dir + "/consensus.fa") as fr:
-            consensus = next(iter(fr)).sequence
-        import h5py
-        with h5py.File(dir + "/arrow-scores.h5", "r") as f:
-            scores   = f["Scores"].value
-            baselineScores = f["BaselineScores"].value
-            colNames = f["ColumnNames"].value
-            rowNames = f["RowNames"].value
-            return ArrowEvidence(refWindow, consensus,
-                                 rowNames, colNames,
-                                 baselineScores, scores)
-
-    def save(self, dir):
-        """
-        Save this ArrowEvidence to a directory.  The directory will be
-        *created* by this method.
-
-        Format of evidence dump:
-        evidence_dump/
-          ref000001/
-            0-1005/
-              consensus.fa
-              arrow-scores.h5
-            995-2005/
-            ...
-        """
-        logging.info("Dumping evidence to %s" % (dir,))
-        join = os.path.join
-        if os.path.exists(dir):
-            raise Exception("Evidence dump does not expect directory %s to exist." % dir)
-        os.makedirs(dir)
-        #refFasta       = FastaWriter(join(dir, "reference.fa"))
-        #readsFasta     = FastaWriter(join(dir, "reads.fa"))
-        consensusFasta = FastaWriter(join(dir, "consensus.fa"))
-        windowName = self.refName + (":%d-%d" % (self.refStart, self.refEnd))
-        #refFasta.writeRecord(windowName, self.refSequence)
-        #refFasta.close()
-
-        consensusFasta.writeRecord(windowName + "|arrow", self.consensus)
-        consensusFasta.close()
-
-        import h5py
-        arrowScoreFile = h5py.File(join(dir, "arrow-scores.h5"))
-        arrowScoreFile.create_dataset("Scores", data=self.scores)
-        vlen_str = h5py.special_dtype(vlen=str)
-        arrowScoreFile.create_dataset("RowNames", data=self.rowNames, dtype=vlen_str)
-        arrowScoreFile.create_dataset("ColumnNames", data=self.colNames, dtype=vlen_str)
-        arrowScoreFile.create_dataset("BaselineScores", data=self.baselineScores)
-        arrowScoreFile.close()
-
-        # for aln in alns:
-        #     readsFasta.writeRecord(str(aln.rowNumber),
-        #                            aln.read(orientation="genomic", aligned=False))
-        # readsFasta.close()
-
-
-    def forPosition(self, pos):
-        posStart = bisect_left(self.positions, pos)
-        posEnd   = bisect_right(self.positions, pos)
-        return ArrowEvidence(self.refStart,
-                             self.consensus,
-                             self.rowNames,
-                             self.colNames[posStart:posEnd],
-                             self.baselineScores,
-                             self.scores[:, posStart:posEnd])
-
-
-    def justSubstitutions(self):
-        colMask = np.array(map(lambda s: ("Sub" in s), self.colNames))
-        return ArrowEvidence(self.refStart,
-                             self.consensus,
-                             self.rowNames,
-                             self.colNames[colMask],
-                             self.baselineScores,
-                             self.scores[:, colMask])
-
-    def rowNumbers(self):
-        # with FastaReader(self.dir + "/reads.fa") as fr:
-        #     return [ int(ctg.name) for ctg in fr ]
-        raise NotImplementedError
--- a/GenomicConsensus/arrow/model.py
+++ b/GenomicConsensus/arrow/model.py
 # Authors: David Alexander, Lance Hepler
+from __future__ import absolute_import, division, print_function

 import numpy as np, ConfigParser, collections, logging
 from glob import glob

--- a/GenomicConsensus/arrow/utils.py
+++ b/GenomicConsensus/arrow/utils.py
 # Authors: David Alexander, Lance Hepler
+from __future__ import absolute_import, division, print_function

 import numpy as np, itertools, logging, re, sys
 from collections import Counter
@@ -406,7 +407,7 @@ def poaConsensus(fwdSequences, arrowConfig):
    ordSeqs = sorted(fwdSequences, key=lambda seq: abs(len(seq) - median))
    ordSeqs = ordSeqs[:arrowConfig.maxPoaCoverage]
    cov = len(ordSeqs)
-    minCov = 1 if cov < 5 else ((cov + 1) / 2 - 1)
+    minCov = 1 if cov < 5 else ((cov + 1) // 2 - 1)
    poaConfig = cc.DefaultPoaConfig(cc.AlignMode_GLOBAL)
    return cc.PoaConsensus.FindConsensus(ordSeqs, poaConfig, minCov)


--- a/GenomicConsensus/consensus.py
+++ b/GenomicConsensus/consensus.py
 # Author: David Alexander
+from __future__ import absolute_import, division, print_function

 import numpy as np


--- a/GenomicConsensus/io/VariantsGffWriter.py
+++ b/GenomicConsensus/io/VariantsGffWriter.py
 # Author: David Alexander
+from __future__ import absolute_import, division, print_function

 import time
 from pbcore.io import GffWriter, Gff3Record

--- a/GenomicConsensus/io/VariantsVcfWriter.py
+++ b/GenomicConsensus/io/VariantsVcfWriter.py
 # Author: Lance Hepler
-
-from __future__ import division, print_function
+from __future__ import absolute_import, division, print_function

 import time
 from textwrap import dedent

--- a/GenomicConsensus/io/__init__.py
+++ b/GenomicConsensus/io/__init__.py
 # Author: David Alexander
-
-from __future__ import absolute_import
+from __future__ import absolute_import, division, print_function
 from .VariantsGffWriter import VariantsGffWriter
 from .utils import *
--- a/GenomicConsensus/io/utils.py
+++ b/GenomicConsensus/io/utils.py
 # Author: David Alexander
+from __future__ import absolute_import, division, print_function

 __all__ = ["loadCmpH5", "loadBam"]


--- a/GenomicConsensus/main.py
+++ b/GenomicConsensus/main.py
 # Author: David Alexander
-
-from __future__ import absolute_import
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function

 import argparse, atexit, cProfile, gc, glob, logging, multiprocessing
 import os, pstats, random, shutil, tempfile, time, threading, Queue, traceback, pprint
@@ -201,11 +199,6 @@ class ToolRunner(object):
            logging.info("Removing %s" % options.temporaryDirectory)
            shutil.rmtree(options.temporaryDirectory, ignore_errors=True)

-    def _setupEvidenceDumpDirectory(self, directoryName):
-        if os.path.exists(directoryName):
-            shutil.rmtree(directoryName)
-        os.makedirs(directoryName)
-
    @property
    def aborting(self):
        return self._aborting
@@ -252,8 +245,6 @@ class ToolRunner(object):
        if options.pdbAtStartup:
            ipdb.set_trace()

-        #logging.info("h5py version: %s" % h5py.version.version)
-        #logging.info("hdf5 version: %s" % h5py.version.hdf5_version)
        logging.info("ConsensusCore version: %s" %
                     (consensusCoreVersion() or "ConsensusCore unavailable"))
        logging.info("ConsensusCore2 version: %s" %
@@ -281,10 +272,6 @@ class ToolRunner(object):
            #options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekFile)
            #if options.disableHdf5ChunkCache:
            #    logging.info("Will disable HDF5 chunk cache (large number of datasets)")
-            #logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count())
-
-        if options.dumpEvidence:
-            self._setupEvidenceDumpDirectory(options.evidenceDirectory)

        self._launchSlaves()
        self._readAlignmentInput()

--- a/GenomicConsensus/options.py
+++ b/GenomicConsensus/options.py
@@ -17,7 +17,7 @@
 #    > from options import options
 #  and get the loaded options dictionary.
 #
-from __future__ import absolute_import
+from __future__ import absolute_import, division, print_function
 import argparse, os, os.path, sys, json

 from pbcommand.models import FileTypes, SymbolTypes, get_pbparser
@@ -351,16 +351,6 @@ def add_options_to_argument_parser(parser):
        dest="doProfiling",
        default=False,
        help="Enable Python-level profiling (using cProfile).")
-    debugging.add_argument(
-        "--dumpEvidence", "-d",
-        dest="dumpEvidence",
-        nargs="?",
-        default=None,
-        const="variants",
-        choices=["variants", "all", "outliers"])
-    debugging.add_argument(
-        "--evidenceDirectory",
-        default="evidence_dump")
    debugging.add_argument(
        "--annotateGFF",
        action="store_true",

--- a/GenomicConsensus/plurality/__init__.py
+++ b/GenomicConsensus/plurality/__init__.py
 # Author: David Alexander
-
+from __future__ import absolute_import, division, print_function
--- a/GenomicConsensus/plurality/plurality.py
+++ b/GenomicConsensus/plurality/plurality.py
 # Author: David Alexander
-
-from __future__ import absolute_import
+from __future__ import absolute_import, division, print_function

 import math, logging, numpy as np, random
 from itertools import izip

--- a/GenomicConsensus/poa/__init__.py
+++ b/GenomicConsensus/poa/__init__.py
 # Author: David Alexander
-
+from __future__ import absolute_import, division, print_function