Skip to content
Commits on Source (10)
uc-echo (1.12-12) unstable; urgency=medium
* Use 2to3 to port to Python3
Closes: #938741
* debhelper-compat 12
* Standards-Version: 4.4.0
* Remove trailing whitespace in debian/changelog
* Remove trailing whitespace in debian/control
* Remove trailing whitespace in debian/rules
* Trim trailing whitespace.
* Use secure URI in debian/watch.
-- Andreas Tille <tille@debian.org> Mon, 02 Sep 2019 21:24:31 +0200
uc-echo (1.12-11) unstable; urgency=medium
* Team upload.
......@@ -89,7 +103,7 @@ uc-echo (1.12-2) unstable; urgency=low
* debian/control: use g++-multilib only when available (Closes: #727657)
* debian/control: use canonical vcs-fields
* add patch to compile on mips/mipsel
* add patch to compile on mips/mipsel
-- Thorsten Alteholz <debian@alteholz.de> Tue, 29 Oct 2013 19:00:07 +0100
......@@ -98,4 +112,3 @@ uc-echo (1.12-1) unstable; urgency=low
* initial version (Closes: #704902)
-- Thorsten Alteholz <debian@alteholz.de> Tue, 12 Mar 2013 18:00:07 +0100
......@@ -5,13 +5,12 @@ Uploaders: Thorsten Alteholz <debian@alteholz.de>,
Navid Fehrenbacher <nf@q-leap.de>
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~),
python-all-dev:any,
libpython-all-dev,
python-scipy:native,
python-numpy:native,
Build-Depends: debhelper-compat (= 12),
python3-all-dev:any,
python3-scipy:native,
python3-numpy:native,
g++-multilib [i386 powerpc sparc x32]
Standards-Version: 4.3.0
Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/uc-echo
Vcs-Git: https://salsa.debian.org/med-team/uc-echo.git
Homepage: http://uc-echo.sourceforge.net/
......@@ -20,11 +19,11 @@ Package: uc-echo
Architecture: any
Depends: ${shlibs:Depends},
${misc:Depends},
${python:Depends},
python-scipy
${python3:Depends},
python3-scipy
Description: error correction algorithm designed for short-reads from NGS
ECHO is an error correction algorithm designed for short-reads
from next-generation sequencing platforms such as Illumina's
Genome Analyzer II. The algorithm uses a Bayesian framework to
improve the quality of the reads in a given data set by employing
ECHO is an error correction algorithm designed for short-reads
from next-generation sequencing platforms such as Illumina's
Genome Analyzer II. The algorithm uses a Bayesian framework to
improve the quality of the reads in a given data set by employing
maximum a posteriori estimation.
Description: Use 2to3 to port to Python3
Bug-Debian: https://bugs.debian.org/938741
Author: Andreas Tille <tille@debian.org>
Last-Update: Mon, 02 Sep 2019 21:21:16 +0200
--- a/ErrorCorrection.py
+++ b/ErrorCorrection.py
@@ -1,3 +1,5 @@
+#!/usr/bin/python3
+
import subprocess
import threading
import itertools
@@ -82,7 +84,7 @@ class LogMgr:
self.logfile.write("%s\n"%output_str)
self.logfile.flush()
if Verbose or console_output:
- print output_str + "\n"
+ print(output_str + "\n")
##########################
@@ -110,7 +112,7 @@ class CmdExecuter:
proc = subprocess.Popen(cmd, bufsize=10240, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
if stdout != "":
- print stdout
+ print(stdout)
returncode = proc.returncode
if stderr is not None and len(stderr)>0:
MsgLogger.log("ERROR: " + stderr, True)
@@ -174,7 +176,7 @@ def poiTotalVar(hist):
poi_pmf = poisson.pmf(arange(len(hist)), m)
residual_mass = 1.0 - sum(poi_pmf)
- total_var = 0.5*sum( abs(p1-p2) for p1, p2 in itertools.izip(emp_pmf, poi_pmf)) + 0.5*residual_mass
+ total_var = 0.5*sum( abs(p1-p2) for p1, p2 in zip(emp_pmf, poi_pmf)) + 0.5*residual_mass
if total_var < est_tv:
est_tv = total_var
@@ -302,7 +304,7 @@ def Hashing(cmdexecuter, target_st, targ
HashFiles[0].append(os.path.join(TmpDIR, "%d.hash"%(read_st)))
IndexFiles[0].append(os.path.join(TmpDIR, "%d.index"%(read_st)))
assert(len(HashFiles) == len(IndexFiles))
- for i in xrange(len(HashFiles)):
+ for i in range(len(HashFiles)):
assert(len(HashFiles[i]) == len(IndexFiles[i]))
if len(HashFiles[i])>=ReadMergeBatchSize or (len(HashFiles[i])>=1 and cur_hash_file==tot_num_hash_files-1):
#cmdexecuter.wait()
@@ -311,7 +313,7 @@ def Hashing(cmdexecuter, target_st, targ
IndexFiles.append([])
# Run appropriate merging
if len(HashFiles[i]) > 1:
- inputFNames = list(itertools.chain(*zip(HashFiles[i], IndexFiles[i])))
+ inputFNames = list(itertools.chain(*list(zip(HashFiles[i], IndexFiles[i]))))
cmdexecuter.exeCmd(makeHashMergeCmd(inputFNames, TmpDIR, "t_%d_%d"%(i+1, len(HashFiles[i+1]))))
cmdexecuter.wait()
elif len(HashFiles[i]) == 1:
@@ -339,10 +341,10 @@ def Hashing(cmdexecuter, target_st, targ
def Neighboring(cmdexecuter, target_st, target_ed, DataBlockSize, nData, paramK, paramh, parame, maxCov, NHashFile, nKmers, param):
# DataBlockSize should be nData (the entire data set)
CachedBlocks = set()
- for read_st in xrange(target_st, target_ed, DataBlockSize):
+ for read_st in range(target_st, target_ed, DataBlockSize):
#read_ed = min(read_st + DataBlockSize, nData)
read_ed = min(target_ed, nData)
- for read_st2 in xrange(0, nData, DataBlockSize):
+ for read_st2 in range(0, nData, DataBlockSize):
read_ed2 = min(read_st2 + DataBlockSize, nData)
if read_st<=read_st2:
@@ -357,7 +359,7 @@ def Neighboring(cmdexecuter, target_st,
# Create empty neighbor file
NeighborFiles = [[]]
- for hashfile in xrange(NHashFile):
+ for hashfile in range(NHashFile):
# NeighborJoin for each hash block
ihash_st = hashfile*nKmers/NHashFile
ihash_ed = (hashfile+1)*nKmers/NHashFile
@@ -373,7 +375,7 @@ def Neighboring(cmdexecuter, target_st,
# NeighborMerge
# Merge together adjacency lists into one main adjacency list
NeighborFiles[0].append(os.path.join(TmpDIR, "neighbors_%d_%d_%d.list"%(hashfile, st, st2)))
- for i in xrange(len(NeighborFiles)):
+ for i in range(len(NeighborFiles)):
if len(NeighborFiles[i])>=HashMergeBatchSize or (len(NeighborFiles[i])>=1 and hashfile==NHashFile-1):
# Extend list if another level of "tree" is added
if len(NeighborFiles)==i+1:
@@ -461,7 +463,7 @@ if __name__ == '__main__':
# Check for existence of input file
if not os.path.isfile(OrigReadFName):
- print "ERROR: Input file " + OrigReadFName + " does not exist."
+ print("ERROR: Input file " + OrigReadFName + " does not exist.")
sys.exit(1)
# Create log directory
@@ -619,10 +621,10 @@ if __name__ == '__main__':
fin = file_open_function(OrigReadFName, "r")
try:
while True:
- line0 = fin.next()
- line1 = fin.next()
- line2 = fin.next()
- line3 = fin.next()
+ line0 = next(fin)
+ line1 = next(fin)
+ line2 = next(fin)
+ line3 = next(fin)
# Quick sanity checks to ensure FASTQ format
assert(line0[0] == "@")
@@ -668,7 +670,7 @@ if __name__ == '__main__':
ModelSelectionSetSize = min(options.msize, nData)
random.seed(1)
- SeqOrdering = range(nData)
+ SeqOrdering = list(range(nData))
random.shuffle(SeqOrdering)
with open(TmpFile.name, "r") as fin:
@@ -677,7 +679,7 @@ if __name__ == '__main__':
for idx in SeqOrdering:
fout.write(struct.pack("Q", ReadSt[idx]))
- for line, id in itertools.izip(fin, itertools.count()):
+ for line, id in zip(fin, itertools.count()):
line = line.strip()
if id%2==0:
fout.write("Y%s\x00"%line)
@@ -740,7 +742,7 @@ if __name__ == '__main__':
nKmers = int(struct.unpack("@I", tmpfile.read(4))[0])
if nKmers < 0:
- print "ERROR: Number of kmers in " + os.path.join(TmpDIR, "all.index") + " is not valid."
+ print("ERROR: Number of kmers in " + os.path.join(TmpDIR, "all.index") + " is not valid.")
sys.exit(1)
##########################
@@ -886,7 +888,7 @@ if __name__ == '__main__':
for read_st in range(0, nData, nData):
with open(os.path.join(TmpDIR, "output_%d_%d_%f.txt"%(read_st, best_paramh, best_parame)), "r") as fin:
with open(os.path.join(TmpDIR, "quality_%d_%d_%f.txt"%(read_st, best_paramh, best_parame)), "r") as qualfin:
- for line, qualline in itertools.izip(fin,qualfin):
+ for line, qualline in zip(fin,qualfin):
readmap[ReadSt[SeqOrdering[cur_read_order]]:(ReadSt[SeqOrdering[cur_read_order]]+ReadLen[SeqOrdering[cur_read_order]]+1)] = line.strip() + '\n'
qualmap[ReadSt[SeqOrdering[cur_read_order]]:(ReadSt[SeqOrdering[cur_read_order]]+ReadLen[SeqOrdering[cur_read_order]]+1)] = qualline.strip() + '\n'
cur_read_order += 1
@@ -898,14 +900,14 @@ if __name__ == '__main__':
# Eliminate reverse complement reads
with open(OutputFName+".seq", "w+b") as fout:
with open(TmpFile.name, "r") as fin:
- for (line, counter) in itertools.izip(fin, itertools.count()):
+ for (line, counter) in zip(fin, itertools.count()):
if counter%2==0:
fout.write(line)
os.unlink(TmpFile.name)
with open(OutputFName+".qual", "w+b") as fout:
with open(QualTmpFile.name, "r") as fin:
- for (line, counter) in itertools.izip(fin, itertools.count()):
+ for (line, counter) in zip(fin, itertools.count()):
if counter%2==0:
fout.write(line)
os.unlink(QualTmpFile.name)
@@ -913,7 +915,7 @@ if __name__ == '__main__':
with open(OutputFName, "w+b") as fout:
with open(OutputFName+".seq", "r") as fseq:
with open(OutputFName+".qual", "r") as fqual:
- for (seq, qual, id) in itertools.izip(fseq, fqual, itertools.count()):
+ for (seq, qual, id) in zip(fseq, fqual, itertools.count()):
fout.write("@%d\n%s\n+\n%s\n"%(id, seq.strip(), qual.strip()))
if not KeepAllFiles:
--- a/README
+++ b/README
@@ -26,7 +26,7 @@ Quickstart:
> make
2. In the same directory, type:
- > python ErrorCorrection.py -o output/sample_data.fastq sample_data.txt
+ > ErrorCorrection.py -o output/sample_data.fastq sample_data.txt
- sample_data.txt is the input file.
- output/sample_data.fastq is the output file name.
@@ -43,7 +43,7 @@ Quickstart:
4. The output file sample_data.fastq.seq can be compared to sample_answer.txt, which contains the original reads without errors. There will be differences between sample_data.fastq.seq and sample_answer.txt because not every error is corrected by the error correction.
5. To run with 4 threads, type
- > python ErrorCorrection.py --ncpu 4 -o output/sample_data.fastq sample_data.txt
+ > ErrorCorrection.py --ncpu 4 -o output/sample_data.fastq sample_data.txt
Installation:
------------
@@ -74,12 +74,12 @@ Note that the parsing method used by ECH
The output file format of ECHO is standard FASTQ format.
To run ECHO, use the following command
-> python ErrorCorrection.py -o output.fastq input.txt
+> ErrorCorrection.py -o output.fastq input.txt
NOTE: The above command will generally work on relatively small data set sizes. The exact size depends on the amount of avaiable RAM. Please read the below 'Notes on Memory Usage' for additional details on running ECHO on larger data sets.
Help messages for additional options can be found by using
-> python ErrorCorrection.py -h
+> ErrorCorrection.py -h
Notes on Memory Usage:
---------------------
@@ -96,10 +96,10 @@ These parameters are set on the command
Examples:
For 5M reads, the following will typically run well with 8GB memory:
-> python ErrorCorrection.py -b 2000000 --nh 256 -o output/5Mreads.fastq 5Mreads.txt
+> ErrorCorrection.py -b 2000000 --nh 256 -o output/5Mreads.fastq 5Mreads.txt
For 20M reads, the following will typically run well with 8GB memory:
-> python ErrorCorrection.py -b 2000000 --nh 1024 -o output/20Mreads.fastq 20Mreads.txt
+> ErrorCorrection.py -b 2000000 --nh 1024 -o output/20Mreads.fastq 20Mreads.txt
Using Multiple Threads:
----------------------
@@ -107,7 +107,7 @@ Use the --ncpu command line option to se
Example:
To use 6 threads at once:
-> python ErrorCorrection.py -b 2000000 --nh 256 --ncpu 6 -o output/5Mreads.fastq 5Mreads.txt
+> ErrorCorrection.py -b 2000000 --nh 256 --ncpu 6 -o output/5Mreads.fastq 5Mreads.txt
Log files:
---------
hardening-flags.patch
include.patch
m64-flag.patch
2to3.patch
......@@ -9,7 +9,7 @@ CPATH=/usr/include/$(DEB_HOST_MULTIARCH)
export LIBRARY_PATH CPATH
ADDCXXFLAGS = -m64
ifeq ($(DEB_HOST_ARCH),$(filter $(DEB_HOST_ARCH),mips mipsel mipsn32 mipsn32el mips64 mips64el hppa kfreebsd-i386 hurd-i386 armel armhf arm64 ia64 sh4 powerpcspe m68k))
ADDCXXFLAGS =
ADDCXXFLAGS =
endif
CXXFLAGS += $(ADDCXXFLAGS)
......@@ -17,7 +17,7 @@ CXXFLAGS += $(ADDCXXFLAGS)
export DEB_BUILD_MAINT_OPTIONS=hardening=+all
%:
dh $@ --with python2
dh $@ --with python3
override_dh_auto_build:
echo "I: DEB_HOST_ARCH: $(DEB_HOST_ARCH)"
......
#!/bin/bash
python /usr/lib/uc-echo/ErrorCorrection.py $@
/usr/lib/uc-echo/ErrorCorrection.py $@
version=3
opts="uversionmangle=s/_/./" \
http://sf.net/uc-echo/echo_v([\d_]+)\.tgz
https://sf.net/uc-echo/echo_v([\d_]+)\.tgz