Skip to content
Commits on Source (3)
gasic for Debian
================
The run_mappers.py script contained code relying on SeQan 1. This was replaced
by SeQan 2 in Debian which does not contain the executable mason any more. The
original command:
mason illumina -N 10000 -hi 0 -hs 0 -n 72 -sq -o <out> <ref>
was replaced by
mason_simulator illumina --illumina-read-length 10000 -o <out> <ref>
It is not sure whether this provides comparable results. If you have a better
suggestion please write a bug report via
reportbug gasic
-- Andreas Tille <tille@debian.org> Tue, 08 Oct 2019 15:01:07 +0200
gasic (0.0.r19-5) UNRELEASED; urgency=medium
* Use 2to3 to port to Python3
Closes: #936582
* In run_mappers.py the executable mason from seqan-apps version 1.x
was called. This is not available any more in seqan-apps 2.x and
thus it was replaced by mason_simulator. Hope that's OK.
-- Andreas Tille <tille@debian.org> Tue, 08 Oct 2019 15:01:07 +0200
gasic (0.0.r19-4) unstable; urgency=medium
* debhelper 11
......
......@@ -6,7 +6,8 @@ Uploaders: Andreas Tille <tille@debian.org>,
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~),
python-all-dev,
dh-python,
python3,
bowtie2
Standards-Version: 4.2.1
Vcs-Browser: https://salsa.debian.org/med-team/gasic
......@@ -16,12 +17,12 @@ Homepage: http://sourceforge.net/projects/gasic/
Package: gasic
Architecture: any
Depends: ${misc:Depends},
${python:Depends},
python-scipy,
python-numpy,
python-biopython,
python-pysam,
python-matplotlib,
${python3:Depends},
python3-scipy,
python3-numpy,
python3-biopython,
python3-pysam,
python3-matplotlib,
bowtie,
bowtie2,
bwa,
......
Description: Use 2to3 to port to Python3
Bug-Debian: https://bugs.debian.org/936582
Author: Andreas Tille <tille@debian.org>
Last-Update: Tue, 08 Oct 2019 15:01:07 +0200
--- a/core/gasic.py
+++ b/core/gasic.py
@@ -46,7 +46,7 @@ def similarity_correction(sim, reads, N)
A = sim
r = reads.astype(np.float) / N
- rng = range(len(reads))
+ rng = list(range(len(reads)))
# Now solve the optimization problem: min_c |Ac-r|^2 s.t. c_i >= 0 and 1-sum(c_i) >= 0
# construct objective function
@@ -179,7 +179,7 @@ def bootstrap(reads, smat_raw, B, test_c
# check if the calculated abundance is below the test abundance
fails[b,:] = corr[b,:] < test_c
- print
+ print()
p_values = np.mean(fails, axis=0)
abundances = np.mean(corr, axis=0)
variances = np.var(corr, axis=0)
--- a/core/tools.py
+++ b/core/tools.py
@@ -54,19 +54,19 @@ def read_names(filename):
def run_bowtie2(index, reads, out, param=""):
command = "bowtie2 -U {reads} -x {index} -S {samfile} {param} --local -M 0 ".format(reads=reads, index=index, samfile=out, param=param)
- print "Executing:",command
+ print("Executing:",command)
os.system(command)
return 1
def run_bowtie(index, reads, out, param=""):
command = "bowtie -S -p 2 -q -3 30 -v 2 {param} {index} {reads} > {samfile}".format(index=index, reads=reads, samfile=out, param=param)
- print "Executing:",command
+ print("Executing:",command)
os.system(command)
return 1
def run_bwa(index, reads, out, param=""):
command = "bwa aln {param} {index} {reads} > /tmp/res.sai".format(index=index, reads=reads, param=param)
- print "Executing:",command
+ print("Executing:",command)
os.system(command)
# convert the bwa output to SAM and remove temporary file
command_sam = "bwa samse %s /tmp/res.sai %s > %s && rm /tmp/res.sai"%(index,reads,out)
@@ -75,7 +75,7 @@ def run_bwa(index, reads, out, param="")
def run_bwasw(index, reads, out, param=""):
command = "bwa bwasw {param} {index} {reads} > {samfile}".format(index=index, reads=reads, samfile=out, param=param)
- print "Executing:",command
+ print("Executing:",command)
os.system(command)
return 1
@@ -116,7 +116,7 @@ How to add your custom mapper
def run_mason_illumina(ref, out):
command = "mason illumina -N 10000 -hi 0 -hs 0 -n 72 -sq -o %s %s"%(out,ref)
- print "Executing:",command
+ print("Executing:",command)
os.system(command)
# remove the needless SAM file
command = "rm %s.sam"%out
@@ -125,7 +125,7 @@ def run_mason_illumina(ref, out):
def run_dwgsim(ref, out):
command = "dwgsim -c 2 -1 80 -2 0 -r 0 -y 0 -e 0.002 -N 100000 -f TACG %s %s"%(ref, out)
- print "Executing:",command
+ print("Executing:",command)
os.system(command)
# remove all additional files and rename reads file
command = "mv {out}.bfast.fastq {out} && rm {out}.bwa.read1.fastq && rm {out}.bwa.read2.fastq && rm {out}.mutations.txt".format(out=out)
--- a/correct_abundances.py
+++ b/correct_abundances.py
@@ -55,10 +55,10 @@ def similarity_correction(names, smat_ra
p: p-value for the confidence, that the true abundance is above some threshold
"""
- print "\n--- GASiC correction\n"
+ print("\n--- GASiC correction\n")
# find out the number of reads
total = len( [1 for read in pysam.Samfile(sam_pattern%(names[0]), "r")] )
- print " found %i reads"%total
+ print(" found %i reads"%total)
# initialize some arrays
# mapping information; mapped[i,j]=1 if read j was successfully mapped to i.
@@ -67,7 +67,7 @@ def similarity_correction(names, smat_ra
# total number of successfully mapped reads per reference
num_reads = np.zeros( (len(names),) )
- print "\n--- Analyzing SAM files\n"
+ print("\n--- Analyzing SAM files\n")
# analyze the SAM files
for n_ind,nm in enumerate(names):
msg = " SAM file %i of %i"%(n_ind+1,len(names))
@@ -82,12 +82,12 @@ def similarity_correction(names, smat_ra
mapped[n_ind,:] = np.array([int(not rd.is_unmapped) for rd in sf])
num_reads[n_ind] = sum(mapped[n_ind,:])
- print "\n\n--- Bootstrapping abundances\n"
+ print("\n\n--- Bootstrapping abundances\n")
# run similarity correction step
p,corr,var = gasic.bootstrap(mapped, smat_raw, bootstrap_samples)
err = np.sqrt(var)
- print "\nDone estimating abundances\n"
+ print("\nDone estimating abundances\n")
return total,num_reads,corr,err,p
@@ -170,7 +170,7 @@ See the provided LICENSE file or source
out = "{name}\t{mapped}\t{corr}\t{error}\t{pval}\n"
ofile.write(out.format(name=nm,mapped=num_reads[n_ind],corr=corr[n_ind]*total,error=err[n_ind]*total,pval=p[n_ind]))
ofile.close()
- print "--- wrote results to", opt.out
+ print("--- wrote results to", opt.out)
else:
parser.print_help()
--- a/create_matrix.py
+++ b/create_matrix.py
@@ -63,14 +63,14 @@ def similarity_matrix_raw(names, ref_pat
os.makedirs(temp_dir)
n_seq = len(names)
- rng = range(n_seq)
+ rng = list(range(n_seq))
# construct arrays with real file names from pattern
ref_files = [ ref_pattern%nm for nm in names ] # filenames of reference sequences
index_files = [ index_pattern%nm for nm in names ] # filenames of mapper index files
sim_files = [ temp_dir+'/'+nm+'.fastq' for nm in names ] # filenames of simulated read files
- print "\n--- Simulating reads for each reference genome with %s\n"%simulator
+ print("\n--- Simulating reads for each reference genome with %s\n"%simulator)
# generate reads for every reference genome
for i in rng:
tools.run_simulator[simulator](ref_files[i], sim_files[i])
@@ -80,14 +80,14 @@ def similarity_matrix_raw(names, ref_pat
# and are stored in fastq format
num_reads = len( [ True for i in SeqIO.parse(sim_files[0],'fastq') ] )
- print "\n--- Mapping simulated reads to reference genomes with %s\n"%mapper
+ print("\n--- Mapping simulated reads to reference genomes with %s\n"%mapper)
# map the reads of every reference to all references
for i in rng:
for j in rng:
samfile = temp_dir+'/'+names[i]+'-'+names[j]+'.sam'
tools.run_mapper[mapper](index_files[j], sim_files[i], samfile)
- print "\n--- Parsing SAM files\n"
+ print("\n--- Parsing SAM files\n")
# parse SAM files
mapped_reads = np.zeros((n_seq,n_seq, num_reads))
for i in rng:
@@ -102,7 +102,7 @@ def similarity_matrix_raw(names, ref_pat
samhandle = pysam.Samfile(samfile, "r")
mapped_reads[i,j,:] = np.array( [int(not read.is_unmapped) for read in samhandle] )
- print "\n\nMatrix creation done\n"
+ print("\n\nMatrix creation done\n")
return mapped_reads
@@ -152,7 +152,7 @@ See the provided LICENSE file or source
# save the similarity matrix
np.save(opt.out, smat)
- print "Wrote similarity matrix to",opt.out
+ print("Wrote similarity matrix to",opt.out)
else:
parser.print_help()
sys.exit(1)
--- a/run_mappers.py
+++ b/run_mappers.py
@@ -76,9 +76,9 @@ READS: File containing the reads to be
for i in range(len(names)):
msg = "--- mapping reads with %s to %s"%(options.mapper, ref[i])
- print msg
+ print(msg)
tools.run_mapper[options.mapper](ref[i], reads, out[i])
- print "\nMapping done.\n"
+ print("\nMapping done.\n")
else:
parser.print_help()
sys.exit(1)
2to3.patch
use_seqan_mason.patch
fix_example_indentation.patch
......@@ -4,14 +4,21 @@ Description: since there is a name conflict with mason and the Debian package
mason is something else than the seqan tool, the code needs to be patched
to find /usr/lib/seqan/bin/mason
--- gasic.orig/core/tools.py
+++ gasic/core/tools.py
@@ -115,7 +115,7 @@
--- a/core/tools.py
+++ b/core/tools.py
@@ -115,7 +115,14 @@ How to add your custom mapper
# to create a seperate caller function for every scenario.
def run_mason_illumina(ref, out):
- command = "mason illumina -N 10000 -hi 0 -hs 0 -n 72 -sq -o %s %s"%(out,ref)
+ command = "/usr/lib/seqan/bin/mason illumina -N 10000 -hi 0 -hs 0 -n 72 -sq -o %s %s"%(out,ref)
print "Executing:",command
+ print("""Original command with mason from SeQan 1 was
+ mason illumina -N 10000 -hi 0 -hs 0 -n 72 -sq -o %s %s
+Please verify that the following call of mason_simulator fits your needs. If not
+please use
+ reportbug gasic
+to propose enhancements.
+""" % (out,ref))
+ command = "/usr/lib/seqan/bin/mason_simulator illumina --illumina-read-length 10000 -o %s %s"%(out,ref)
print("Executing:",command)
os.system(command)
# remove the needless SAM file
......@@ -3,7 +3,10 @@
# DH_VERBOSE := 1
%:
dh $@ --with python2
dh $@ --with python3
override_dh_python3:
dh_python3 --shebang=/usr/bin/python3
get-orig-source:
. debian/get-orig-source