Skip to content
Commits on Source (7)
pbbarcode (0.8.0-6) UNRELEASED; urgency=medium
* Use 2to3 to port to Python3
Closes: #937253
* debhelper-compat 12
* Standards-Version: 4.4.1
* autopkgtest: s/ADTTMP/AUTOPKGTEST_TMP/g
* Set upstream metadata fields: Repository, Repository-Browse.
TODO: #938009 (python-pbcore)
-- Andreas Tille <tille@debian.org> Sat, 30 Nov 2019 17:59:51 +0100
pbbarcode (0.8.0-5) unstable; urgency=medium
* Team upload.
......
......@@ -2,18 +2,18 @@ Source: pbbarcode
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~),
Build-Depends: debhelper-compat (= 12),
dh-python,
python-setuptools,
python3-setuptools,
python3-docutils,
python-all-dev,
python-pbcore,
python-numpy,
python-h5py,
python3-all-dev,
python3-pbcore,
python3-numpy,
python3-h5py,
# Test-Depends:
python-nose,
python3-nose,
pbh5tools
Standards-Version: 4.2.1
Standards-Version: 4.4.1
Vcs-Browser: https://salsa.debian.org/med-team/pbbarcode
Vcs-Git: https://salsa.debian.org/med-team/pbbarcode.git
Homepage: https://github.com/PacificBiosciences/pbbarcode
......@@ -22,8 +22,8 @@ Package: pbbarcode
Architecture: any
Depends: ${misc:Depends},
${shlibs:Depends},
${python:Depends},
python-pkg-resources
${python3:Depends},
python3-pkg-resources
Suggests: pbdagcon
Description: annotate PacBio sequencing reads with barcode information
The pbbarcode package provides tools for annotating PacBio sequencing reads
......
Description: Use 2to3 to port to Python3
Bug-Debian: https://bugs.debian.org/937253
Author: Andreas Tille <tille@debian.org>
Last-Update: Sat, 30 Nov 2019 17:59:51 +0100
--- a/Makefile
+++ b/Makefile
@@ -5,17 +5,17 @@ SHELL = /bin/bash -e
all: build install
build:
- python setup.py build --executable="/usr/bin/env python"
+ python3 setup.py build --executable="/usr/bin/python3"
bdist:
- python setup.py build --executable="/usr/bin/env python"
- python setup.py bdist --formats=egg
+ python3 setup.py build --executable="/usr/bin/python3"
+ python3 setup.py bdist --formats=egg
install:
- python setup.py install
+ python3 setup.py install
develop:
- python setup.py develop
+ python3 setup.py develop
test:
find tests -name "*.py" | xargs nosetests
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -40,8 +40,8 @@ source_suffix = '.rst'
master_doc = 'index'
# General information about the project.
-project = u'pbbarcode'
-copyright = u'2012, PacBio'
+project = 'pbbarcode'
+copyright = '2012, PacBio'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
@@ -183,8 +183,8 @@ latex_elements = {
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
- ('index', 'pbbarcode.tex', u'pbbarcode Documentation',
- u'PacBio', 'manual'),
+ ('index', 'pbbarcode.tex', 'pbbarcode Documentation',
+ 'PacBio', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
@@ -213,8 +213,8 @@ latex_documents = [
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
- ('index', 'pbbarcode', u'pbbarcode Documentation',
- [u'PacBio'], 1)
+ ('index', 'pbbarcode', 'pbbarcode Documentation',
+ ['PacBio'], 1)
]
# If true, show URL addresses after external links.
@@ -227,8 +227,8 @@ man_pages = [
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
- ('index', 'pbbarcode', u'pbbarcode Documentation',
- u'PacBio', 'pbbarcode', 'One line description of project.',
+ ('index', 'pbbarcode', 'pbbarcode Documentation',
+ 'PacBio', 'pbbarcode', 'One line description of project.',
'Miscellaneous'),
]
--- a/src/python/pbbarcode/BarcodeLabeler.py
+++ b/src/python/pbbarcode/BarcodeLabeler.py
@@ -35,8 +35,9 @@ import numpy as n
from pbcore.io.BarcodeH5Reader import LabeledZmw, \
BARCODE_DELIMITER
+from functools import reduce
-__RC_MAP__ = dict(zip('ACGTacgt-N','TGCAtgca-N'))
+__RC_MAP__ = dict(list(zip('ACGTacgt-N','TGCAtgca-N')))
class BarcodeScorer(object):
def __init__(self, basH5, barcodeFasta,
@@ -53,8 +54,7 @@ class BarcodeScorer(object):
self.basH5 = basH5
self.barcodeFasta = list(barcodeFasta)
self.aligner = Aligner.SWaligner()
- self.barcodeLength = n.unique(map(lambda x : len(x.sequence),
- self.barcodeFasta))
+ self.barcodeLength = n.unique([len(x.sequence) for x in self.barcodeFasta])
if len(self.barcodeLength) > 1:
raise Exception("Currently, all barcodes must be the same length.")
else:
@@ -96,7 +96,7 @@ class BarcodeScorer(object):
if self.scoreMode == 'paired':
return n.array([self.makeBCLabel(self.barcodeFasta[i].name,
self.barcodeFasta[i+1].name) for i
- in xrange(0, len(self.barcodeSeqs), 2)])
+ in range(0, len(self.barcodeSeqs), 2)])
else:
return n.array([self.makeBCLabel(x.name, x.name) for x in self.barcodeFasta])
@@ -193,7 +193,7 @@ class BarcodeScorer(object):
def choosePaired(o):
if o[1] == 1:
s = n.array([max(o[2][i], o[2][i + 1]) for i in \
- xrange(0, len(self.barcodeSeqs), 2)])
+ range(0, len(self.barcodeSeqs), 2)])
p = n.argsort(-s)
s = s[p]
else:
@@ -202,9 +202,9 @@ class BarcodeScorer(object):
# missed adapter will confuse this computation.
scores = o[3]
results = n.zeros(len(self.barcodeSeqs)/2)
- for i in xrange(0, len(self.barcodeSeqs), 2):
+ for i in range(0, len(self.barcodeSeqs), 2):
pths = [0,0]
- for j in xrange(0, len(scores)):
+ for j in range(0, len(scores)):
pths[j % 2] += scores[j][i]
pths[1 - j % 2] += scores[j][i + 1]
results[i/2] = max(pths)
--- a/src/python/pbbarcode/SWaligner.py
+++ b/src/python/pbbarcode/SWaligner.py
@@ -65,7 +65,7 @@ class SWaligner(object):
self.dpMat,
query,
targetSeqs)
- return numpy.array([scores[i] for i in xrange(0, len(scores))])
+ return numpy.array([scores[i] for i in range(0, len(scores))])
return scorer
--- a/src/python/pbbarcode/main.py
+++ b/src/python/pbbarcode/main.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
#################################################################################$$
# Copyright (c) 2011,2012, Pacific Biosciences of California, Inc.
#
@@ -55,6 +55,7 @@ from pbbarcode.BarcodeLabeler import *
from pbbarcode._version import __version__
from pbh5tools.CmpH5Utils import copyAttributes
+from functools import reduce
# Paths to the Barcode Datasets in the cmp.h5 file.
BC_ALN_INFO_DS = "AlnInfo/Barcode"
@@ -110,7 +111,7 @@ def makeBarcodeFofnFromBasFofn():
logging.debug("Using %d processes." % runner.args.nProcs)
if runner.args.nProcs <= 1:
- newFiles = map(mpWrapper, inFiles)
+ newFiles = list(map(mpWrapper, inFiles))
else:
pool = Pool(runner.args.nProcs)
newFiles = pool.map(mpWrapper, inFiles)
@@ -158,7 +159,7 @@ def labelAlignments():
# we use the first one to get the labels, if somehow they
# don't have all of the same stuff that will be an issue.
bcLabels = n.concatenate((bcFofn.barcodeLabels, n.array([BARCODE_DELIMITER])))
- H5.create_dataset(BC_INFO_ID, data = n.array(range(0, len(bcLabels))),
+ H5.create_dataset(BC_INFO_ID, data = n.array(list(range(0, len(bcLabels)))),
dtype = 'int32')
H5.create_dataset(BC_INFO_NAME, data = bcLabels, dtype = h5.new_vlen(str))
if BC_ALN_INFO_DS in H5:
@@ -180,17 +181,17 @@ def zipFofns(*inFofns):
return lines
sortedFofns = [readAndSort(inFofn) for inFofn in inFofns]
- l = map(len, sortedFofns)
+ l = list(map(len, sortedFofns))
if len(n.unique(l)) != 1:
raise Exception("Fofns don't match, unequal number of inputs.")
else:
- for i in xrange(0, n.unique(l)):
+ for i in range(0, n.unique(l)):
if len(n.unique([movieNameFromFile(sortedFofn[i]) for
sortedFofn in sortedFofns])) != 1:
raise Exception("Fofn elements don't match, movies differ.")
# need to un-arrayify these guys
- return zip(*map(list, sortedFofns))
+ return list(zip(*list(map(list, sortedFofns))))
def filterZmws(zmwsForBCs):
"""Apply various filterings passed by the user. There are somewhat
@@ -209,7 +210,7 @@ def filterZmws(zmwsForBCs):
def molLenGuess(zmw):
if zmw.baxH5.hasRawBasecalls:
- return max(map(len, zmw.subreads)) if zmw.subreads else 0
+ return max(list(map(len, zmw.subreads))) if zmw.subreads else 0
else:
return len(zmw.ccsRead) if zmw.ccsRead else 0
@@ -237,7 +238,7 @@ def filterZmws(zmwsForBCs):
else:
return True
- return { k:filter(zmwFilterFx, v) for k,v in zmwsForBCs.items() }
+ return { k:list(filter(zmwFilterFx, v)) for k,v in list(zmwsForBCs.items()) }
def _warnOnce():
var = []
@@ -276,18 +277,18 @@ def getFastqRecords(zmw, lZmw = None):
def getFastqs():
zmwsByBarcode = getZmwsForBarcodes()
logging.debug("Pre-filter: Average number of ZMWs per barcode: %d" %
- n.mean([len(zmwsByBarcode[k]) for k in zmwsByBarcode.keys()]))
+ n.mean([len(zmwsByBarcode[k]) for k in list(zmwsByBarcode.keys())]))
zmwsByBarcode = filterZmws(zmwsByBarcode)
logging.debug("Post-filter: Average number of ZMWs per barcode: %d" %
- n.mean([len(zmwsByBarcode[k]) for k in zmwsByBarcode.keys()]))
+ n.mean([len(zmwsByBarcode[k]) for k in list(zmwsByBarcode.keys())]))
def getReadData(zmws):
recs = [getFastqRecords(zmw,lZmw) for zmw,lZmw in zmws]
- recs = filter(lambda x : x, recs)
+ recs = [x for x in recs if x]
return [elt for sublst in recs for elt in sublst]
- return {k:getReadData(zmws) for k, zmws in zmwsByBarcode.iteritems()}
+ return {k:getReadData(zmws) for k, zmws in zmwsByBarcode.items()}
def emitFastqs():
outFiles = getFastqs()
@@ -309,7 +310,7 @@ def emitFastqs():
record = FastqRecord
l = 'a' if runner.args.fasta else 'q'
- for k in outFiles.keys():
+ for k in list(outFiles.keys()):
if outFiles[k]:
with writer("%s/%s.fast%s" % (runner.args.outDir, k, l)) as w:
for e in outFiles[k]:
@@ -328,7 +329,7 @@ def getUnlabeledZmws():
basH5 = BasH5Reader(basFile)
bcH5 = BarcodeH5Reader(barcodeFile)
sdiff = basH5.sequencingZmws[~n.in1d(basH5.sequencingZmws,
- bcH5.labeledZmws.keys())]
+ list(bcH5.labeledZmws.keys()))]
for hn in sdiff:
unlabeledZmws.append(basH5[hn])
@@ -351,7 +352,7 @@ def getZmwsForBarcodes(labels = None):
lZmws = bcH5.labeledZmwsFromBarcodeLabel(label)
for lZmw in lZmws:
zmw = basH5[lZmw.holeNumber]
- if not label in zmwsForBCs.keys():
+ if not label in list(zmwsForBCs.keys()):
zmwsForBCs[label] = []
zmwsForBCs[label].append((zmw, lZmw))
@@ -424,13 +425,13 @@ def subsampleReads(e):
k = int(len(e)*runner.args.subsample)
else:
k = len(e)
- i = n.array(random.sample(range(0, len(e)), k), dtype = int)
+ i = n.array(random.sample(list(range(0, len(e))), k), dtype = int)
logging.debug("subsampled down to: %d" % len(i))
return [e[j] for j in i]
def callConsensus():
def makeReadAndReads(zmwsForBC):
- ccsData = filter(lambda x:x, [zmw.ccsRead for _,_,zmw in zmwsForBC if zmw])
+ ccsData = [x for x in [zmw.ccsRead for _,_,zmw in zmwsForBC if zmw] if x]
srData = reduce(lambda x,y : x+y, [zmw.subreads for zmw,_,_ in
zmwsForBC if zmw], [])
if not srData and not ccsData:
@@ -438,7 +439,7 @@ def callConsensus():
def getSeedRead(reads, lq = 80, uq = 90,
sLambda = lambda x : -x.zmw.readScore):
- lens = map(len, reads)
+ lens = list(map(len, reads))
candidateRange = (n.percentile(lens, lq),
n.percentile(lens, uq))
pfReads = [read for read,l in zip(reads, lens) if
@@ -476,16 +477,16 @@ def callConsensus():
zmwsForBCs = getZmwsForBarcodes()
# subsample
- zmwsForBCs = {k:subsampleReads(v) for k,v in zmwsForBCs.items()}
+ zmwsForBCs = {k:subsampleReads(v) for k,v in list(zmwsForBCs.items())}
logging.info("unfiltered average zmws per barcode: %g" %
- n.round(n.mean(map(len, zmwsForBCs.values()))))
+ n.round(n.mean(list(map(len, list(zmwsForBCs.values()))))))
# filter ZMWs
zmwsForBCs = filterZmws(zmwsForBCs)
logging.info("filtered average zmws per barcode: %g" %
- n.round(n.mean(map(len, zmwsForBCs.values()))))
+ n.round(n.mean(list(map(len, list(zmwsForBCs.values()))))))
# now choose the best subread to seed the assembly
if runner.args.ccsFofn:
@@ -496,7 +497,7 @@ def callConsensus():
open(runner.args.ccsFofn).read().splitlines()}
# fill in the CCS spot.
- for k,v in zmwsForBCs.items():
+ for k,v in list(zmwsForBCs.items()):
l = []
for zmw,lZmw in v:
r = ccsReaders[movieNameFromFile(zmw.baxH5.file.filename)]
@@ -505,17 +506,17 @@ def callConsensus():
else:
# add none to the CCS spot.
zmwsForBCs = {k:[(zmw,lZmw,None) for zmw,lZmw in v]
- for k,v in zmwsForBCs.iteritems()}
+ for k,v in zmwsForBCs.items()}
- readAndReads = { k:makeReadAndReads(v) for k,v in zmwsForBCs.items() }
+ readAndReads = { k:makeReadAndReads(v) for k,v in list(zmwsForBCs.items()) }
# remove barcodes that don't have a seed read and a set of useable reads.
- readAndReads = { k:v for k,v in readAndReads.items() if v[0] and v[1] }
+ readAndReads = { k:v for k,v in list(readAndReads.items()) if v[0] and v[1] }
# generate FASTA files
outDir = runner.args.outDir
- for barcode, reads in readAndReads.items():
+ for barcode, reads in list(readAndReads.items()):
bcdir = '/'.join((outDir, barcode))
if not os.path.exists(bcdir):
os.makedirs(bcdir)
@@ -537,8 +538,7 @@ def callConsensus():
for inFof, in zipFofns(runner.args.inputFofn):
bh5 = BaxH5Reader(inFof)
reg = bh5.file['/PulseData/Regions']
- inMovie = filter(lambda z : z.baxH5.movieName == bh5.movieName,
- subreads)
+ inMovie = [z for z in subreads if z.baxH5.movieName == bh5.movieName]
holes = n.in1d(reg[:,0], n.array([a.holeNumber for a in inMovie]))
if any(holes):
nreg = reg[holes,:]
@@ -558,12 +558,12 @@ def callConsensus():
ofile.close()
## call gcon
- outDirs = [ (outDir, k) for k in readAndReads.keys() ]
+ outDirs = [ (outDir, k) for k in list(readAndReads.keys()) ]
if runner.args.nProcs == 1:
- outFasta = filter(lambda z: z, map(gconFunc, outDirs))
+ outFasta = [z for z in map(gconFunc, outDirs) if z]
else:
pool = Pool(runner.args.nProcs)
- outFasta = filter(lambda z : z, pool.map(gconFunc, outDirs))
+ outFasta = [z for z in pool.map(gconFunc, outDirs) if z]
## write the results
with FastaWriter('/'.join((outDir, "consensus.fa"))) as w:
@@ -572,7 +572,7 @@ def callConsensus():
## optionally cleanup
if not runner.args.keepTmpDir:
- for barcode, reads in readAndReads.items():
+ for barcode, reads in list(readAndReads.items()):
bcdir = '/'.join((outDir, barcode))
shutil.rmtree(bcdir)
--- a/tests/cram/consensus.t.disabled
+++ b/tests/cram/consensus.t.disabled
@@ -1,6 +1,6 @@
- $ export INH5=`python -c "from pbcore import data ; print data.getCmpH5()"`
- $ export INBH51=`python -c "from pbcore import data ; print data.geBasH5s[0]"`
- $ export INBH52=`python -c "from pbcore import data ; print data.getBasH5s[1]"`
+ $ export INH5=`python3 -c "from pbcore import data ; print data.getCmpH5()"`
+ $ export INBH51=`python3 -c "from pbcore import data ; print data.geBasH5s[0]"`
+ $ export INBH52=`python3 -c "from pbcore import data ; print data.getBasH5s[1]"`
$ export BARCODE_FASTA=$TESTDIR/../../etc/barcode.fasta
$ echo $INBH51 > bas.fofn
$ echo $INBH52 >> bas.fofn
--- a/tests/cram/sanity.t
+++ b/tests/cram/sanity.t
@@ -1,6 +1,6 @@
- $ export INH5=`python -c "from pbcore import data ; print data.getCmpH5()"`
- $ export INBH51=`python -c "from pbcore import data ; print data.getBasH5s()[0]"`
- $ export INBH52=`python -c "from pbcore import data ; print data.getBasH5s()[1]"`
+ $ export INH5=`python3 -c "from pbcore import data ; print data.getCmpH5()"`
+ $ export INBH51=`python3 -c "from pbcore import data ; print data.getBasH5s()[0]"`
+ $ export INBH52=`python3 -c "from pbcore import data ; print data.getBasH5s()[1]"`
$ export BARCODE_FASTA=$TESTDIR/../../etc/barcode.fasta
$ echo $INBH51 > bas.fofn
$ echo $INBH52 >> bas.fofn
format-manpage.patch
module-multiarch-path.patch
spelling.patch
2to3.patch
......@@ -9,12 +9,12 @@ include /usr/share/dpkg/default.mk
PYBUILD_NAME = pbbarcode
%:
LC_ALL=C.UTF-8 dh $@ --with python2 --buildsystem=pybuild
LC_ALL=C.UTF-8 dh $@ --with python3 --buildsystem=pybuild
override_dh_auto_test:
ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
PYBUILD_SYSTEM=custom \
PYBUILD_TEST_ARGS="find tests -name '*.py' | xargs nosetests -v" \
PYBUILD_TEST_ARGS="find tests -name '*.py' | xargs nosetests3 -v" \
dh_auto_test
endif
......
Tests: run-unit-test
Depends: @, pbh5tools, python-nose, python-cram
Depends: @, pbh5tools, python3-nose, python3-cram
Restrictions: allow-stderr
......@@ -2,15 +2,15 @@
pkg=pbbarcode
if [ "$ADTTMP" = "" ] ; then
ADTTMP=`mktemp -d /tmp/${pkg}-test.XXXXXX`
trap "rm -rf $ADTTMP" 0 INT QUIT ABRT PIPE TERM
if [ "$AUTOPKGTEST_TMP" = "" ] ; then
AUTOPKGTEST_TMP=`mktemp -d /tmp/${pkg}-test.XXXXXX`
trap "rm -rf $AUTOPKGTEST_TMP" 0 INT QUIT ABRT PIPE TERM
fi
cp -a /usr/share/doc/${pkg}/etc $ADTTMP
cp -a /usr/share/doc/${pkg}/tests $ADTTMP
cp -a /usr/share/doc/${pkg}/etc $AUTOPKGTEST_TMP
cp -a /usr/share/doc/${pkg}/tests $AUTOPKGTEST_TMP
cd $ADTTMP
cd $AUTOPKGTEST_TMP
find . -name "*.gz" -exec gunzip \{\} \;
find tests -name "*.py" | xargs nosetests
......@@ -22,8 +22,8 @@ find tests/cram -name "*.t" | grep -v consensus.t | xargs cram --verbose
# I have tried the very first part of this test which translates to
cd etc
python -c "from pbcore import data ; print data.getBasH5s()[0]" > bas.fofn
python -c "from pbcore import data ; print data.getBasH5s()[1]" >> bas.fofn
python3 -c "from pbcore import data ; print data.getBasH5s()[0]" > bas.fofn
python3 -c "from pbcore import data ; print data.getBasH5s()[1]" >> bas.fofn
pbbarcode labelZmws barcode.fasta bas.fofn
# and even this takes long enough that I decided to kill the process
# So probably this is nothing we should push to the autopkgtest infrastructure
......
......@@ -5,3 +5,5 @@ Registry:
Entry: OMICS_18566
- Name: SciCrunch
Entry: NA
Repository: https://github.com/PacificBiosciences/pbbarcode
Repository-Browse: https://github.com/PacificBiosciences/pbbarcode