Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · 43e9ade9
--- a/README.md
+++ b/README.md
 ### Important
-Users with versions dating prior to June 12 2013 please update. A nasty bug that caused the statistical part of mapDamage to use half of the data for estimation of the damage parameters, sorry for the inconvenience.
+Users with versions dating prior to June 12 2013 please update. A nasty bug that caused the statistical part of `mapDamage` to use half of the data for estimation of the damage parameters, sorry for the inconvenience.

 ### Introduction
 Complete documentation, instructions, examples, screenshots and FAQ are available at [this address](http://ginolhac.github.io/mapDamage/).

-[mapDamage2](http://geogenetics.ku.dk/publications/mapdamage2.0/) is a computational framework written in Python and R, which tracks and quantifies DNA damage patterns 
-among ancient DNA sequencing reads generated by Next-Generation Sequencing platforms. 
+[mapDamage2](http://geogenetics.ku.dk/publications/mapdamage2.0/) is a computational framework written in Python and R, which tracks and quantifies DNA damage patterns
+among ancient DNA sequencing reads generated by Next-Generation Sequencing platforms.

-Mapdamage is developed at the [Centre for GeoGenetics](http://geogenetics.ku.dk/) by the [Orlando Group ](http://geogenetics.ku.dk/research/research_groups/palaeomix_group/).
+`mapDamage` is developed at the [Centre for GeoGenetics](http://geogenetics.ku.dk/) by the [Orlando Group ](http://geogenetics.ku.dk/research/research_groups/palaeomix_group/).

 ### Web interface

 Anna Kostikova from [insideDNA](https://insidedna.me) implemented a web interface for mapDamage.  
-Users can adjust the cloud ressources in terms of RAM/CPU to perform their analysis. She wrote a [tutorial](https://insidedna.me/tutorials/view/Analysis-ancient-DNA-samples-using-mapDamage) explaining how to use this [web interface (sign up required)](https://insidedna.me/app#/tools/100648/)
+Users can adjust the cloud resources in terms of RAM/CPU to perform their analysis. She wrote a [tutorial](https://insidedna.me/tutorials/view/Analysis-ancient-DNA-samples-using-mapDamage) explaining how to use this [web interface (sign up required)](https://insidedna.me/app#/tools/100648/)


 ### Citation
@@ -31,5 +31,4 @@ http://bioinformatics.oxfordjournals.org/content/27/15/2153](http://bioinformati

 ### Contact
 Please report bugs and suggest possible improvements to Aurélien Ginolhac, Mikkel Schubert or Hákon Jónsson by email:
-aginolhac at snm.ku.dk, MSchubert at snm.ku.dk or jonsson.hakon at gmail.com.
-
+ginolhac at gmail.com, mikkelsch at gmail.com or jonsson.hakon at gmail.com.
--- a/bin/mapDamage
+++ b/bin/mapDamage
-#!/usr/bin/python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

-from __future__ import print_function
-
 import logging
 import random
 import time
@@ -37,7 +35,7 @@ plot and quantify damage patterns from a SAM/BAM file
 :Date: November 2012
 :Type: tool
 :Input: SAM/BAM
-:Output: tabulated tables, pdf 
+:Output: tabulated tables, pdf
 """

 # check if pysam if available
@@ -45,7 +43,7 @@ MODULE = "pysam"
 URL = "http://code.google.com/p/pysam/"
 try:
    __import__(MODULE)
-except ImportError, e:
+except ImportError as e:
    sys.stderr.write("Error: Could not import required module '%s':\n\t- %s\n" % (MODULE, e))
    sys.stderr.write("       If module is not installed, please download from '%s'.\n" % URL)
    sys.stderr.write("       A local install may be performed using the following command:\n")
@@ -98,7 +96,7 @@ def _downsample_to_fixed_number(bamfile, options):
            if index >= downsample_to:
                continue
        sample[index] = record
-    return filter(None, sample)
+    return [_f for _f in sample if _f]


 def _read_bamfile(bamfile, options):
@@ -116,12 +114,12 @@ def _read_bamfile(bamfile, options):

 def _check_mm_option():
    """
-    As the user can override the system wide mapdamage modules with 
-    the --mapdamage-modules, it has to happen before option parsing 
+    As the user can override the system wide mapdamage modules with
+    the --mapdamage-modules, it has to happen before option parsing
    in mapdamage.parseoptions
    """
    path_to_mm = None
-    for nr,arg in zip(xrange(len(sys.argv)),sys.argv):
+    for nr,arg in zip(list(range(len(sys.argv))),sys.argv):
        if arg.startswith("--mapdamage-modules"):
            try:
                if "=" in arg:
@@ -130,7 +128,7 @@ def _check_mm_option():
                    path_to_mm = arg_p[1]
                else:
                    # the option is of the format --mapdamage-modules AAAA
-                    path_to_mm = sys.argv[nr+1] 
+                    path_to_mm = sys.argv[nr+1]
                break
            except IndexError as e:
                raise SystemExit("Must specify a path to --mapdamage-modules")
@@ -201,7 +199,7 @@ def main():
                mapdamage.composition.get_base_comp(options.ref,path_to_basecomp)
            mapdamage.rscript.run_stats(options)
            return 0
-    
+
    # fetch all references and associated lengths in nucleotides
    try:
        ref = pysam.Fastafile(options.ref)
@@ -230,7 +228,7 @@ def main():
        in_bam = pysam.Samfile(options.filename)


-    reflengths = dict(zip(in_bam.references, in_bam.lengths))
+    reflengths = dict(list(zip(in_bam.references, in_bam.lengths)))
    # check if references in SAM/BAM are the same in the fasta reference file
    fai_lengths = mapdamage.seq.read_fasta_index(options.ref + ".fai")
    if not fai_lengths:
@@ -322,7 +320,7 @@ def main():

        # compute base composition for genomic regions
        mapdamage.composition.count_ref_comp(read, chrom, before, after, dnacomp)
-        
+
        if options.fasta:
            mapdamage.seq.write_fasta(read, chrom, seq, refseq, min(coordinate), max(coordinate), before, after, fhfasta)

@@ -357,7 +355,7 @@ def main():
    if mapdamage.tables.check_table_and_warn_if_dmg_freq_is_low(options.folder):
        options.no_stats = True

-   
+
    # run the Bayesian estimation
    if not options.no_stats:
        # before running the Bayesian estimation get the base composition

--- a/debian/changelog
+++ b/debian/changelog
-mapdamage (2.0.9+dfsg-2) UNRELEASED; urgency=medium
+mapdamage (2.1.0+dfsg-1) UNRELEASED; urgency=medium

-  * Use 2to3 to port to Python3
+  * New upstream version
    Closes: #936989
  * debhelper-compat 12
  * Standards-Version: 4.4.0
@@ -8,7 +8,7 @@ mapdamage (2.0.9+dfsg-2) UNRELEASED; urgency=medium
  * buildsystem=pybuild
  * python3-pysam <!nocheck>

- -- Andreas Tille <tille@debian.org>  Tue, 03 Sep 2019 11:02:34 +0200
+ -- Andreas Tille <tille@debian.org>  Tue, 01 Oct 2019 17:13:12 +0200

 mapdamage (2.0.9+dfsg-1) unstable; urgency=medium


--- a/debian/patches/2to3.patch
+++ b/debian/patches/2to3.patch
-Description: Use 2to3 to port to Python3
-Bug-Debian: https://bugs.debian.org/936989
-Author: Andreas Tille <tille@debian.org>
-Last-Update: Tue, 03 Sep 2019 11:02:34 +0200
-
--- a/bin/mapDamage
-+++ b/bin/mapDamage
-@@ -1,7 +1,7 @@
-#!/usr/bin/python
-+#!/usr/bin/python3
- # -*- coding: utf-8 -*-
- 
-from __future__ import print_function
-+
- 
- import logging
- import random
-@@ -45,7 +45,7 @@ MODULE = "pysam"
- URL = "http://code.google.com/p/pysam/"
- try:
-     __import__(MODULE)
-except ImportError, e:
-+except ImportError as e:
-     sys.stderr.write("Error: Could not import required module '%s':\n\t- %s\n" % (MODULE, e))
-     sys.stderr.write("       If module is not installed, please download from '%s'.\n" % URL)
-     sys.stderr.write("       A local install may be performed using the following command:\n")
-@@ -98,7 +98,7 @@ def _downsample_to_fixed_number(bamfile,
-             if index >= downsample_to:
-                 continue
-         sample[index] = record
-    return filter(None, sample)
-+    return [_f for _f in sample if _f]
- 
- 
- def _read_bamfile(bamfile, options):
-@@ -121,7 +121,7 @@ def _check_mm_option():
-     in mapdamage.parseoptions
-     """
-     path_to_mm = None
-    for nr,arg in zip(xrange(len(sys.argv)),sys.argv):
-+    for nr,arg in zip(range(len(sys.argv)),sys.argv):
-         if arg.startswith("--mapdamage-modules"):
-             try:
-                 if "=" in arg:
-@@ -230,7 +230,7 @@ def main():
-         in_bam = pysam.Samfile(options.filename)
- 
- 
-    reflengths = dict(zip(in_bam.references, in_bam.lengths))
-+    reflengths = dict(list(zip(in_bam.references, in_bam.lengths)))
-     # check if references in SAM/BAM are the same in the fasta reference file
-     fai_lengths = mapdamage.seq.read_fasta_index(options.ref + ".fai")
-     if not fai_lengths:
--- a/mapdamage/__init__.py
-+++ b/mapdamage/__init__.py
-@@ -1,4 +1,4 @@
-#!/usr/bin/env python
-+#!/usr/bin/python3
- 
- import mapdamage.parseoptions
- import mapdamage.seq
--- a/mapdamage/align.py
-+++ b/mapdamage/align.py
-@@ -1,4 +1,4 @@
-#!/usr/bin/env python
-+#!/usr/bin/python3
- 
- import itertools
- import mapdamage
-@@ -84,7 +84,7 @@ def get_mis(read, seq, refseq, ref, leng
-   std = '-' if read.is_reverse else '+'
-   subtable = tab[ref][end][std]
- 
-  for (i, nt_seq, nt_ref) in itertools.izip(xrange(length), seq, refseq):
-+  for (i, nt_seq, nt_ref) in zip(range(length), seq, refseq):
-     if (nt_seq in "ACGT-") and (nt_ref in "ACGT-"):
-       if nt_ref != "-":
-         # record base composition in the reference, only A, C, G, T
--- a/mapdamage/composition.py
-+++ b/mapdamage/composition.py
-@@ -1,4 +1,4 @@
-#!/usr/bin/env python
-+#!/usr/bin/python3
- 
- import mapdamage
- import itertools
-@@ -10,8 +10,8 @@ def count_ref_comp(read, chrom, before,
-   """ record basae composition in external genomic regions """
-   std = '-' if read.is_reverse else '+'
- 
-  _update_table(comp[chrom]['5p'][std], before, xrange(-len(before), 0))
-  _update_table(comp[chrom]['3p'][std], after,  xrange(1, len(after) + 1))
-+  _update_table(comp[chrom]['5p'][std], before, range(-len(before), 0))
-+  _update_table(comp[chrom]['3p'][std], after,  range(1, len(after) + 1))
- 
- 
- def count_read_comp(read, chrom, length, comp):
-@@ -20,12 +20,12 @@ def count_read_comp(read, chrom, length,
-   if read.is_reverse:
-     std, seq = '-', mapdamage.seq.revcomp(seq)
- 
-  _update_table(comp[chrom]['5p'][std], seq,           xrange(1, length + 1))
-  _update_table(comp[chrom]['3p'][std], reversed(seq), xrange(-1, - length - 1, -1))
-+  _update_table(comp[chrom]['5p'][std], seq,           range(1, length + 1))
-+  _update_table(comp[chrom]['3p'][std], reversed(seq), range(-1, - length - 1, -1))
- 
- 
- def _update_table(table, sequence, indices):
-  for (index, nt) in itertools.izip(indices, sequence):
-+  for (index, nt) in zip(indices, sequence):
-     if nt in table:
-       table[nt][index] += 1
- 
-@@ -47,7 +47,7 @@ def get_base_comp(filename,destination=F
-             bases["C"] = bases["C"] + int(tabs[3])
-             bases["G"] = bases["G"] + int(tabs[4])
-             bases["T"] = bases["T"] + int(tabs[5])
-    except (OSError, ValueError), error:
-+    except (OSError, ValueError) as error:
-         sys.stderr.write("Error: Seqtk failed: %s\n" % (error,))
-         sys.exit(1)
-     # get the base frequencies
--- a/mapdamage/parseoptions.py
-+++ b/mapdamage/parseoptions.py
-@@ -1,4 +1,4 @@
-#!/usr/bin/env python
-+#!/usr/bin/python3
- 
- from optparse import OptionParser, OptionGroup, SUPPRESS_HELP
- import os
-@@ -235,12 +235,12 @@ def options():
- 
-     if os.path.isdir(options.folder):
-         if not options.quiet and not options.plot_only:
-            print("Warning, %s already exists" % options.folder)
-+            print(("Warning, %s already exists" % options.folder))
-         if options.plot_only:
-             if not file_exist(options.folder+"/dnacomp.txt") or not file_exist(options.folder+"/misincorporation.txt"):
-                 parser.error('folder %s is not a valid result folder' % options.folder)
-     else:
-        os.makedirs(options.folder, mode = 0750)
-+        os.makedirs(options.folder, mode = 0o750)
-         if options.plot_only or options.stats_only or options.rescale_only:
-             sys.stderr.write("Error, %s does not exist while plot/stats/rescale only was used\n" % options.folder)
-             return None
--- a/mapdamage/rescale.py
-+++ b/mapdamage/rescale.py
-@@ -31,14 +31,14 @@ def get_corr_prob(folder, rescale_length
-         fi_handle = csv.DictReader(open(full_path))
-         corr_prob = {}
-         for line in fi_handle:
-            if (corr_prob.has_key(line["Position"])):
-+            if (line["Position"] in corr_prob):
-                 sys.exit('This file has multiple position definitions %s, line %d: %s' % \
-                     (folder, fi_handle.line_num, corr_prob[line["Position"]]))
-             else:
-                 corr_prob[int(line["Position"])] = {'C.T':float(line["C.T"]), 'G.A':float(line["G.A"])}
- 
-         # Exclude probabilities for positions outside of user-specified region
-        for key in corr_prob.keys():
-+        for key in list(corr_prob.keys()):
-             if key < -rescale_length_3p or key > rescale_length_5p:
-                 corr_prob.pop(key)
- 
-@@ -75,13 +75,13 @@ def corr_this_base(corr_prob, nt_seq, nt
-     back_pos = pos-length-1 
-     # position from 3' end
- 
-    if corr_prob.has_key(pos):
-+    if pos in corr_prob:
-         p5_corr = corr_prob[pos][subs]
-         # correction from 5' end
-     else:
-         p5_corr = 0
- 
-    if corr_prob.has_key(back_pos):
-+    if back_pos in corr_prob:
-         p3_corr = corr_prob[back_pos][subs]
-         # correction from 3' end
-     else:
-@@ -104,7 +104,7 @@ def corr_this_base(corr_prob, nt_seq, nt
- 
- def initialize_subs():
-     """Initialize a substitution table, to track the expected substitution counts"""
-    per_qual = dict(zip(range(0,130),[0]*130))
-+    per_qual = dict(list(zip(list(range(0,130)),[0]*130)))
-     subs = {"CT-before":per_qual.copy(),\
-             "TC-before":per_qual.copy(),\
-             "GA-before":per_qual.copy(),\
-@@ -164,7 +164,7 @@ def qual_summary_subs(subs):
-             for qv in subs[i]:
-                 if qv >= lv :
-                     key = i+"-Q"+str(lv)
-                    if subs.has_key(key):
-+                    if key in subs:
-                         subs[key] += subs[i][qv]
-                     else:
-                         subs[key] = subs[i][qv]
-@@ -174,32 +174,32 @@ def print_subs(subs):
-     print("\tThe expected substition frequencies before and after scaling using the scaled qualities as probalities:")
-     if subs["C"]!=0:
-         # the special case of no substitutions
-        print("\tCT\t"+str(subs["CT-pvals_before"]/subs["C"])+"\t\t"+str(subs["CT-pvals"]/subs["C"]))
-+        print(("\tCT\t"+str(subs["CT-pvals_before"]/subs["C"])+"\t\t"+str(subs["CT-pvals"]/subs["C"])))
-     else: 
-         print("\tCT\tNA\t\tNA")
-     if subs["T"]!=0:
-        print("\tTC\t"+str(subs["TC-pvals"]/subs["T"])+"\t\t"+str(subs["TC-pvals"]/subs["T"]))
-+        print(("\tTC\t"+str(subs["TC-pvals"]/subs["T"])+"\t\t"+str(subs["TC-pvals"]/subs["T"])))
-     else:
-         print("\tTC\tNA\t\tNA")
-     if subs["G"]!=0:
-        print("\tGA\t"+str(subs["GA-pvals_before"]/subs["G"])+"\t\t"+str(subs["GA-pvals"]/subs["G"]))
-+        print(("\tGA\t"+str(subs["GA-pvals_before"]/subs["G"])+"\t\t"+str(subs["GA-pvals"]/subs["G"])))
-     else:
-         print("\tGA\tNA\t\tNA")
-     if subs["A"]!=0:
-        print("\tAG\t"+str(subs["AG-pvals"]/subs["A"])+"\t\t"+str(subs["AG-pvals"]/subs["A"]))
-+        print(("\tAG\t"+str(subs["AG-pvals"]/subs["A"])+"\t\t"+str(subs["AG-pvals"]/subs["A"])))
-     else:
-         print("\tAG\tNA\t\tNA")
-     print("\tQuality metrics before and after scaling")
-    print("\tCT-Q0 \t"+str(subs["CT-before-Q0"])+"\t\t"+str(subs["CT-after-Q0"]))
-    print("\tCT-Q10 \t"+str(subs["CT-before-Q10"])+"\t\t"+str(subs["CT-after-Q10"]))
-    print("\tCT-Q20 \t"+str(subs["CT-before-Q20"])+"\t\t"+str(subs["CT-after-Q20"]))
-    print("\tCT-Q30 \t"+str(subs["CT-before-Q30"])+"\t\t"+str(subs["CT-after-Q30"]))
-    print("\tCT-Q40 \t"+str(subs["CT-before-Q40"])+"\t\t"+str(subs["CT-after-Q40"]))
-    print("\tGA-Q0 \t"+str(subs["GA-before-Q0"])+"\t\t"+str(subs["GA-after-Q0"]))
-    print("\tGA-Q10 \t"+str(subs["GA-before-Q10"])+"\t\t"+str(subs["GA-after-Q10"]))
-    print("\tGA-Q20 \t"+str(subs["GA-before-Q20"])+"\t\t"+str(subs["GA-after-Q20"]))
-    print("\tGA-Q30 \t"+str(subs["GA-before-Q30"])+"\t\t"+str(subs["GA-after-Q30"]))
-    print("\tGA-Q40 \t"+str(subs["GA-before-Q40"])+"\t\t"+str(subs["GA-after-Q40"]))
-+    print(("\tCT-Q0 \t"+str(subs["CT-before-Q0"])+"\t\t"+str(subs["CT-after-Q0"])))
-+    print(("\tCT-Q10 \t"+str(subs["CT-before-Q10"])+"\t\t"+str(subs["CT-after-Q10"])))
-+    print(("\tCT-Q20 \t"+str(subs["CT-before-Q20"])+"\t\t"+str(subs["CT-after-Q20"])))
-+    print(("\tCT-Q30 \t"+str(subs["CT-before-Q30"])+"\t\t"+str(subs["CT-after-Q30"])))
-+    print(("\tCT-Q40 \t"+str(subs["CT-before-Q40"])+"\t\t"+str(subs["CT-after-Q40"])))
-+    print(("\tGA-Q0 \t"+str(subs["GA-before-Q0"])+"\t\t"+str(subs["GA-after-Q0"])))
-+    print(("\tGA-Q10 \t"+str(subs["GA-before-Q10"])+"\t\t"+str(subs["GA-after-Q10"])))
-+    print(("\tGA-Q20 \t"+str(subs["GA-before-Q20"])+"\t\t"+str(subs["GA-after-Q20"])))
-+    print(("\tGA-Q30 \t"+str(subs["GA-before-Q30"])+"\t\t"+str(subs["GA-after-Q30"])))
-+    print(("\tGA-Q40 \t"+str(subs["GA-before-Q40"])+"\t\t"+str(subs["GA-after-Q40"])))
-     
- 
- def rescale_qual_read(bam, read, ref, corr_prob,subs, debug = False,direction="both"):
-@@ -237,7 +237,7 @@ def rescale_qual_read(bam, read, ref, co
-     new_qual = [-100]*length_read
-     pos_on_read = 0
-     number_of_rescaled_bases = 0.0
-    for (i, nt_seq, nt_ref, nt_qual) in itertools.izip(xrange(length_align), seq, refseq, qual):
-+    for (i, nt_seq, nt_ref, nt_qual) in zip(range(length_align), seq, refseq, qual):
-         # rescale the quality according to the triplet position, 
-         # pair of the reference and the sequence
-         if ((nt_seq == "T" and nt_ref =="C") or (nt_seq == "A" and nt_ref =="G")):
--- a/mapdamage/rescale_test.py
-+++ b/mapdamage/rescale_test.py
-@@ -1,6 +1,9 @@
- import unittest
- import optparse
-import rescale
-+try:
-+    from . import rescale
-+except:
-+    from mapdamage import rescale
- import pysam
- import filecmp
- 
--- a/mapdamage/rscript.py
-+++ b/mapdamage/rscript.py
-@@ -1,4 +1,4 @@
-#!/usr/bin/env python
-+#!/usr/bin/python3
- 
- import os
- import subprocess
-@@ -35,11 +35,11 @@ def plot(opt):
-   script = construct_path("mapDamage.R") 
-   call = ["Rscript", script, fcomp, title, opt.refplot, fmut, opt.readplot, \
-       opt.ymax, opt.folder, opt.title, __version__]
-  code = subprocess.call(map(str, call))
-+  code = subprocess.call(list(map(str, call)))
- 
-   if code == 0:
-     if not opt.quiet:
-      print("pdf %s generated" % title)
-+      print(("pdf %s generated" % title))
-     return 0
-   else:
-     print("Error: plotting with R failed")
-@@ -58,7 +58,7 @@ def opt_plots(opt):
-   script = construct_path("lengths.R") 
-   call = ["Rscript", script, flength, output, fmut, opt.length, \
-       opt.title, __version__, bool_to_int(opt.quiet)]
-  code = subprocess.call(map(str, call))
-+  code = subprocess.call(list(map(str, call)))
-   if code == 0:
-     return 0
-   else:
-@@ -90,11 +90,11 @@ def check_R_lib():
-     if len(missing_lib) > 1:
-         # Grammar Nazi has arrived
-         last_ele = missing_lib.pop()
-        print ("Missing the following R libraries '" + "', '".join(missing_lib) \
-            + "' and '" + last_ele + "'")
-+        print(("Missing the following R libraries '" + "', '".join(missing_lib) \
-+            + "' and '" + last_ele + "'"))
-         return 1
-     elif len(missing_lib) == 1:
-        print ("Missing the following R library "+missing_lib[0])
-+        print(("Missing the following R library "+missing_lib[0]))
-         return 1
-     else :
-         # No missing libraries
--- a/mapdamage/seq.py
-+++ b/mapdamage/seq.py
-@@ -1,9 +1,9 @@
-#!/usr/bin/env python
-+#!/usr/bin/python3
- import sys
- import string
- 
- # from Martin Kircher, to complement DNA
-TABLE = string.maketrans('TGCAMRWSYKVHDBtgcamrwsykvhdb', \
-+TABLE = str.maketrans('TGCAMRWSYKVHDBtgcamrwsykvhdb', \
-     'ACGTKYWSRMBDHVacgtkywsrmbdhv')
- 
- LETTERS = ("A", "C", "G", "T")
--- a/mapdamage/tables.py
-+++ b/mapdamage/tables.py
-@@ -1,4 +1,4 @@
-#!/usr/bin/env python
-+#!/usr/bin/python3
- 
- import os
- import collections
-@@ -16,7 +16,7 @@ def initialize_mut(ref, length):
-       for std in ('+','-'):
-         tab_std = tab_end[std] = {}
-         for mut in mapdamage.seq.HEADER:
-          tab_std[mut] = dict.fromkeys(xrange(length), 0)
-+          tab_std[mut] = dict.fromkeys(range(length), 0)
-   
-   return tab
- 
-@@ -26,8 +26,8 @@ def print_mut(mut, opt, out):
- 
- 
- def initialize_comp(ref, around, length):
-  keys = {"3p" : range(-length, 0) + range(1, around + 1),
-          "5p" : range(-around, 0) + range(1, length + 1)}
-+  keys = {"3p" : list(range(-length, 0)) + list(range(1, around + 1)),
-+          "5p" : list(range(-around, 0)) + list(range(1, length + 1))}
- 
-   tab = {}
-   for contig in ref:
-@@ -74,8 +74,8 @@ def check_table_and_warn_if_dmg_freq_is_
-   total = 0.0
-   for filename in ("5pCtoT_freq.txt", "3pGtoA_freq.txt"):
-     if not os.path.exists(folder+"/"+filename):
-      print("Error: Required table has not been created ('%s'), bayesian computation cannot be performed" \
-            % filename)
-+      print(("Error: Required table has not been created ('%s'), bayesian computation cannot be performed" \
-+            % filename))
-       return True
- 
-     with open(os.path.join(folder, filename)) as handle:
-@@ -85,12 +85,12 @@ def check_table_and_warn_if_dmg_freq_is_
-           total += float(freq[1])
-           break
-       else:
-        print("Error: Could not find pos = 1 in table '%s', bayesian computation cannot be performed" \
-              % filename)
-+        print(("Error: Could not find pos = 1 in table '%s', bayesian computation cannot be performed" \
-+              % filename))
-         return True
- 
-   if total < 0.01:
-    print("Warning: DNA damage levels are too low, the Bayesian computation should not be performed (%f < 0.01)\n" % total)
-+    print(("Warning: DNA damage levels are too low, the Bayesian computation should not be performed (%f < 0.01)\n" % total))
- 
-   return False
- 
-@@ -103,9 +103,9 @@ def _print_freq_table(table, columns, op
-   out.write("# Chr: reference from sam/bam header, End: from which termini of DNA sequences, Std: strand of reads\n")
-   out.write("Chr\tEnd\tStd\tPos\t%s\n" % ("\t".join(columns)))
- 
-  for (reference, ends) in sorted(table.iteritems()):
-    for (end, strands) in sorted(ends.iteritems()):
-      for (strand, subtable) in sorted(strands.iteritems()):
-+  for (reference, ends) in sorted(table.items()):
-+    for (end, strands) in sorted(ends.items()):
-+      for (strand, subtable) in sorted(strands.items()):
-         subtable["Total"] = {}
-         for index in sorted(subtable[columns[0]]):
-           subtable["Total"][index] = sum(subtable[letter][index] for letter in mapdamage.seq.LETTERS)
--- a/mapdamage/version.py
-+++ b/mapdamage/version.py
-@@ -1,4 +1,4 @@
-#!/usr/bin/env python
-+#!/usr/bin/python3
- try:
-     from mapdamage._version import __version__
- except ImportError:
--- a/setup.py
-+++ b/setup.py
-@@ -1,4 +1,4 @@
-#!/usr/bin/env python
-+#!/usr/bin/python3
- # -*- coding: utf-8 -*-
- 
- from distutils.core import setup
-@@ -14,9 +14,9 @@ def setup_version():
-     try:
-         version = subprocess.check_output(("git", "describe", "--always", "--tags", "--dirty"))
-         with open(os.path.join("mapdamage", "_version.py"), "w") as handle:
-            handle.write("#!/usr/bin/env python\n")
-+            handle.write("#!/usr/bin/python3\n")
-             handle.write("__version__ = %r\n" % (version.strip(),))
-    except (subprocess.CalledProcessError, OSError), error:
-+    except (subprocess.CalledProcessError, OSError) as error:
-         raise SystemExit("Could not determine mapDamage version: %s" % (error,))
- 
- 
--- a/debian/patches/fix_quantile_na_rm.patch
+++ b/debian/patches/fix_quantile_na_rm.patch
@@ -4,11 +4,9 @@ Description: change calcSampleStats such as any NA and Nan's are
 Last-Update: 2017-03-28
 Bug: https://github.com/ginolhac/mapDamage/issues/18

-Index: mapdamage/mapdamage/Rscripts/stats/function.R
-===================================================================
--- mapdamage.orig/mapdamage/Rscripts/stats/function.R	2017-03-29 00:02:46.538470957 -0400
-+++ mapdamage/mapdamage/Rscripts/stats/function.R	2017-03-29 00:11:09.141025051 -0400
-@@ -595,8 +595,8 @@
+--- a/mapdamage/Rscripts/stats/function.R
+++ b/mapdamage/Rscripts/stats/function.R
+@@ -595,8 +595,8 @@ calcSampleStats <- function(da,X){
                       pos=da[,"Pos"],
                       mea=apply(X,1,mean),
                       med=apply(X,1,median),

--- a/debian/patches/series
+++ b/debian/patches/series
 use_debian_packaged_seqtk.patch
 fix_quantile_na_rm.patch
-2to3.patch
--- a/debian/patches/use_debian_packaged_seqtk.patch
+++ b/debian/patches/use_debian_packaged_seqtk.patch
@@ -27,23 +27,24 @@ Description: Use Debian packaged seqtk
 def setup_version():
     if not os.path.exists(".git"):
         # Release version, no .git folder
-@@ -40,16 +25,7 @@ class compileInstall(DistutilsInstall):
+@@ -40,17 +25,8 @@ class compileInstall(DistutilsInstall):
     def run(self):
         self.record=""
         setup_version()
 -        compile_seqtk()
         DistutilsInstall.run(self)
-        # fixing the permission problem of seqtk
+         # fixing the permission problem of seqtk
 -        files = self.get_outputs()
 -        for fi in files:
 -            if fi.endswith("seqtk/seqtk"):
-                os.chmod(fi,0755)
+-                os.chmod(fi,0o755)
+-
 -
 -
 -
- 
 
 setup(
+     cmdclass={'install': compileInstall},
 @@ -59,7 +35,7 @@ setup(
     author='Aurélien Ginolhac, Mikkel Schubert, Hákon Jónsson',
     author_email='MSchubert@snm.ku.dk, jonsson.hakon@gmail.com',
@@ -55,7 +56,7 @@ Description: Use Debian packaged seqtk
     license='LICENSE.txt',
 --- a/bin/mapDamage
 +++ b/bin/mapDamage
-@@ -151,7 +151,7 @@ def main():
+@@ -149,7 +149,7 @@ def main():
         sys.path.insert(0,path_to_mm)
     import mapdamage
 
@@ -66,12 +67,12 @@ Description: Use Debian packaged seqtk
                          "been intalled properly or current user does not\n"
 --- a/mapdamage/composition.py
 +++ b/mapdamage/composition.py
-@@ -35,7 +35,7 @@ def get_base_comp(filename,destination=F
+@@ -33,7 +33,7 @@ def get_base_comp(filename,destination=F
     Gets the basecomposition of all the sequences in filename
     and returns the value to destination if given.
     """
-    path_to_seqtk = mapdamage.rscript.construct_path("seqtk",folder="seqtk") 
-+    path_to_seqtk = "/usr/bin/seqtk" 
+-    path_to_seqtk = mapdamage.rscript.construct_path("seqtk",folder="seqtk")
+    path_to_seqtk = "/usr/bin/seqtk"
     bases = {"A":0,"C":0,"G":0,"T":0}
     alp = ["A","C","G","T"]
     try:
--- a/mapdamage/Rscripts/mapDamage.R
+++ b/mapdamage/Rscripts/mapDamage.R
@@ -33,20 +33,20 @@ draw.open.rect <- function(xleft, ybottom, xright, ytop, padding = 0) {
  } else {
    xpoints <- c(xright, xleft - padding, xleft - padding, xright)
  }
-    
+
  lines(xpoints, c(ytop, ytop, ybottom, ybottom), col = "darkgrey")
 }


 plot.base.composition <- function(tbl, base, color, around, ylabels.at = c(), xlabels = FALSE) {
  xcoords <- c(-around:-1, 1:around)
-  
+
  plot.axis <- function(yaxis.at) {
    axis(side = yaxis.at, labels = (yaxis.at == ylabels.at), line = 0, las = 2, cex.axis = 0.8)
    if ((yaxis.at == 2) && (yaxis.at == ylabels.at)) {
      mtext("Frequency", side = 2, line = 2.5, cex = 0.6)
    }
-  
+
    if (xlabels) {
      axis(side = 1, labels = xcoords, at = xcoords, las = 2, cex.axis = 0.6)
    } else {
@@ -64,7 +64,7 @@ plot.base.composition <- function(tbl, base, color, around, ylabels.at = c(), xl

    ycoords <- NULL
    for (i in xcoords) {
-      ycoords <- append(ycoords, mean(subtbl[(subtbl$Pos==i), base]/subtbl$Total[(subtbl$Pos==i)],na.rm=T)) 
+      ycoords <- append(ycoords, mean(subtbl[(subtbl$Pos==i), base]/subtbl$Total[(subtbl$Pos==i)],na.rm=T))
    }
    points(xcoords, ycoords, pch = 20, col = color, type = "b")
  }
@@ -80,7 +80,7 @@ plot.base.composition <- function(tbl, base, color, around, ylabels.at = c(), xl
  plot.frequencies("3p")
  plot.axis(4)
  draw.open.rect(-around, 0, 0, 0.5)
-  
+
  par(mar = c(1, 2, 1, 2))
 }

@@ -103,8 +103,8 @@ calculate.mutation.table <- function(filename, length)

 write.mutation.table <- function(tbl, end, mismatch, filename)
  {
-    columns <- c("pos", sprintf("5p%s", mismatch))
-    tbl[is.na(tbl)] <- 0 # Python doesn't like NAs  
+    columns <- c("pos", sprintf("%s%s", end, mismatch))
+    tbl[is.na(tbl)] <- 0 # Python doesn't like NAs
    write.table(tbl[tbl$End == end, c("Pos", mismatch)],
                row.names = FALSE, col.names = columns,
                sep = "\t", quote = FALSE,
@@ -117,12 +117,12 @@ plot.mutations <- function(end, axis.at, start.i, end.i, modifier) {
    subtable <- tbl[tbl$End == end, c("Pos", mismatches)]
    rates    <- rowSums(subtable) - subtable$Pos
    subtable <- aggregate(list(Rate = rates), list(Pos = subtable$Pos), sum)
-    
+
    lines(subtable$Pos * modifier, subtable$Rate,
          xlim = c(1, OPT.LENGTH), ylim = c(0, OPT.YMAX),
          col = color, lwd = width)
  }
-  
+
  plot(NA, xlim = c(start.i, end.i), ylim=c(0, OPT.YMAX), col="grey", lwd = 1, type = "l", xlab = "", ylab = "", axes = FALSE)
  axis(side = 1, labels = start.i:end.i, at = start.i:end.i, las = 2, cex.axis = 0.8)
  axis(side = axis.at, labels = TRUE, las = 2, cex.axis = 0.8)
@@ -130,11 +130,11 @@ plot.mutations <- function(end, axis.at, start.i, end.i, modifier) {
      mtext("Frequency", side = 2, line = 2.5, cex = 0.6)
  }
  draw.open.rect(start.i, OPT.YMAX, end.i, -0.01, padding = 0.5)
-  
+
  for (mismatch in MISMATCHES) {
    do.plot(mut, end, modifier, mismatch, "grey", 1)
  }
-  
+
  do.plot(mut, end, modifier, CLIPPING,   "orange", 1)
  do.plot(mut, end, modifier, DELETIONS,  "green", 1)
  do.plot(mut, end, modifier, INSERTIONS, "purple", 1)

--- a/mapdamage/Rscripts/stats/function.R
+++ b/mapdamage/Rscripts/stats/function.R
 #Various useful functions
 getPmat <- function(tmu,tv_ti_ratio,acgt){
-    #Returns the evolutionary substitution matrix  
+    #Returns the evolutionary substitution matrix
    if (sum(acgt>=1)!=0 || sum(acgt<=0)!=0){
        write("The ACGT frequencies must be in the range 0 to 1",stderr())
-        stop()       
+        stop()
    }
    if (all.equal(sum(acgt),1)!=TRUE){
        write("The ACGT frequencies do not sum to 1",stderr())
-        stop()       
+        stop()
    }
    if (tv_ti_ratio<=0){
        write("The transversion and transtition ratio cannot go under 0",stderr())
-        stop()       
+        stop()
    }
    #Returns the substitution probability matrix.
    if (identical(tv_ti_ratio,1) && identical(acgt,c(.25,.25,.25,.25))){
@@ -23,7 +23,7 @@ getPmat <- function(tmu,tv_ti_ratio,acgt){
        E  <- diag(exp(r$values))

        #      Q        The eigen vector change of basis
-        #  M  ->   M   
+        #  M  ->   M
        #
        #  ^       ^
        #  |B^-1   |B
@@ -32,8 +32,8 @@ getPmat <- function(tmu,tv_ti_ratio,acgt){
        out <- solve(a=t(B),b= E %*% t(B))#Little trick to avoid numerical difficulties
        rownames(out) <- c("A","C","G","T")
        colnames(out) <- c("A","C","G","T")
-        return(out) 
-    } 
+        return(out)
+    }
 }

 jukesCantorPmat <- function(tmu){
@@ -49,22 +49,22 @@ qmatHKY85 <- function(tmu,tv_ti,acgt){
    #               |    pi_a*tv_ti    pi_c              pi_g * tv_ti   sum_4         |
    #returns this matrix
    #This could be replaced with the analytical formulas for the substitutions probabilities.
-    
+
    Qmat <- matrix(rep(acgt,4),ncol=4,byrow=TRUE)
    diag(Qmat) <- 0

    #Adjusting the transversions versus transitions
-    #-  *  -  *    
-    #*  -  *  -    
-    #-  *  -  *    
-    #*  -  *  -    
+    #-  *  -  *
+    #*  -  *  -
+    #-  *  -  *
+    #*  -  *  -

    Qmat[1,2] <- tv_ti*Qmat[1,2]
    Qmat[1,4] <- tv_ti*Qmat[1,4]

    Qmat[2,1] <- tv_ti*Qmat[2,1]
    Qmat[2,3] <- tv_ti*Qmat[2,3]
-    
+
    Qmat[3,2] <- tv_ti*Qmat[3,2]
    Qmat[3,4] <- tv_ti*Qmat[3,4]

@@ -73,12 +73,12 @@ qmatHKY85 <- function(tmu,tv_ti,acgt){

    diag(Qmat) <- -apply(Qmat,1,sum)

-    Qmat <- tmu * Qmat 
+    Qmat <- tmu * Qmat
    return(Qmat)
 }

 metroDesc <- function(lpr,lol){
-    # the logic in the Metropolis-Hastings step 
+    # the logic in the Metropolis-Hastings step
    stopifnot(!is.na(lpr))
    stopifnot(!is.na(lol))
    if (log(runif(1))<lpr-lol){
@@ -89,7 +89,7 @@ metroDesc <- function(lpr,lol){
 }

 genOverHang <- function(la){
-    #Currently not used in the main code but could used to generate 
+    #Currently not used in the main code but could used to generate
    #overhangs like Philip
    r <- runif(1)
    i <- -1
@@ -143,7 +143,7 @@ seqProbVecLambda <- function(lambda,lambda_disp,m,fo_only=NA,re_only=NA){
    }
 }

-#The following is an MC simulation code to mimic the 
+#The following is an MC simulation code to mimic the
 #nick frequency part in the model from Philip
 seqProbVecNuWithLengths<- cxxfunction(methods::signature(
                                      I_la="numeric",
@@ -162,7 +162,7 @@ seqProbVecNuWithLengths<- cxxfunction(methods::signature(
                      double r = ((double) rand() / (RAND_MAX));
                      int i = -1;
                      double p = 0;
-                      double term = -500; 
+                      double term = -500;
                      while (p<r){
                          if (i==-1){
                              term = 1;
@@ -175,7 +175,7 @@ seqProbVecNuWithLengths<- cxxfunction(methods::signature(
                      return(i);
                  }
                  ',body= '
-              srand(time(0)); 
+              srand(time(0));
        Rcpp::NumericVector la(I_la);
        Rcpp::NumericVector la_disp(I_la_disp);
        Rcpp::NumericVector nu(I_nu);
@@ -199,7 +199,7 @@ seqProbVecNuWithLengths<- cxxfunction(methods::signature(
                double left_o_hang = genOverHang(la[0],la_disp[0]);
                double right_o_hang = genOverHang(la[0],la_disp[0]);
                double o_hang  = left_o_hang+right_o_hang;
-//              
+//
                if (o_hang>=les(i)){
                    //Single stranded sequence
                    for (int j = 0; j <les(i);j++){
@@ -211,8 +211,8 @@ seqProbVecNuWithLengths<- cxxfunction(methods::signature(
                        for (int j = 0; j <(les[i]-right_o_hang);j++){
                            output(j) = output(j)+1;
                        }
-                        //The right overhang is always G>A for the double stranded but 
-                        //Here we will make the assumption that the pattern is symmetric 
+                        //The right overhang is always G>A for the double stranded but
+                        //Here we will make the assumption that the pattern is symmetric
                        //for practical reasons We can\'t  do that ....
                    }else {
                        Rcpp::NumericVector sa = floor(runif(1,0,les[i]-o_hang))+left_o_hang;
@@ -258,7 +258,7 @@ pDam <- function(th,ded,des,la,nu,lin){
 }

 logLikFunOneBaseSlow <- function(Gen,S,Theta,deltad,deltas,laVec,nuVec,m,lin){
-    #Calculates the log likelihood using R, a C++ version is available  
+    #Calculates the log likelihood using R, a C++ version is available
    ll <- 0
    for (i in 1:length(laVec)){
        #Get the damage probabilities
@@ -268,7 +268,7 @@ logLikFunOneBaseSlow <- function(Gen,S,Theta,deltad,deltas,laVec,nuVec,m,lin){
    return(ll)
 }

-#The same logic as in logLikFunOneBaseSlow except using a compiled code 
+#The same logic as in logLikFunOneBaseSlow except using a compiled code
 #to do the hard work
 logLikFunOneBaseFast <- cxxfunction(methods::signature(
                                      I_Gen="numeric",
@@ -333,7 +333,7 @@ return(ret);


 logLikAll <- function(dat,Theta,deltad,deltas,laVec,nuVec,m){
-    #Calculates the logLikelihood for all the bases by calling 
+    #Calculates the logLikelihood for all the bases by calling
    # logLikFunOneBaseFast for each base
    if (deltad<0 || deltad>1 || deltas<0 || deltas>1  ){
        return(-Inf)
@@ -341,8 +341,8 @@ logLikAll <- function(dat,Theta,deltad,deltas,laVec,nuVec,m){
    #A,C,G and T

    deb <- 0
-    
-    Asub <- dat[,"A.C"]+dat[,"A.G"]+dat[,"A.T"]    
+
+    Asub <- dat[,"A.C"]+dat[,"A.G"]+dat[,"A.T"]
    ALL <- logLikFunOneBaseFast(dat[,"A"],cbind(dat[,"A"]-Asub,dat[,"A.C"],dat[,"A.G"],dat[,"A.T"]),Theta,deltad,deltas,laVec,nuVec,m,1)
    if (deb){
        ALLSlow <- logLikFunOneBaseSlow(dat[,"A"],cbind(dat[,"A"]-Asub,dat[,"A.C"],dat[,"A.G"],dat[,"A.T"]),Theta,deltad,deltas,laVec,nuVec,m,1)
@@ -355,7 +355,7 @@ logLikAll <- function(dat,Theta,deltad,deltas,laVec,nuVec,m){
        CLLSlow <- logLikFunOneBaseSlow(dat[,"C"],cbind(dat[,"C.A"],dat[,"C"]-Csub,dat[,"C.G"],dat[,"C.T"]),Theta,deltad,deltas,laVec,nuVec,m,2)
        stopifnot(all.equal(CLL,CLLSlow))
    }
-    
+

    Gsub <- dat[,"G.A"]+dat[,"G.C"]+dat[,"G.T"]
    GLL <- logLikFunOneBaseFast(dat[,"G"],cbind(dat[,"G.A"],dat[,"G.C"],dat[,"G"]-Gsub,dat[,"G.T"]),Theta,deltad,deltas,laVec,nuVec,m,3)
@@ -363,7 +363,7 @@ logLikAll <- function(dat,Theta,deltad,deltas,laVec,nuVec,m){
        GLLSlow <- logLikFunOneBaseSlow(dat[,"G"],cbind(dat[,"G.A"],dat[,"G.C"],dat[,"G"]-Gsub,dat[,"G.T"]),Theta,deltad,deltas,laVec,nuVec,m,3)
        stopifnot(all.equal(CLL,CLLSlow))
    }
-    
+
    Tsub <- dat[,"T.A"]+dat[,"T.C"]+dat[,"T.G"]
    TLL <- logLikFunOneBaseFast(dat[,"T"],cbind(dat[,"T.A"],dat[,"T.C"],dat[,"T.G"],dat[,"T"]-Tsub),Theta,deltad,deltas,laVec,nuVec,m,4)
    if (deb){
@@ -375,7 +375,7 @@ logLikAll <- function(dat,Theta,deltad,deltas,laVec,nuVec,m){


 getParams <- function(cp){
-    #Utility function nice to update the MCMC iterations matrix 
+    #Utility function nice to update the MCMC iterations matrix
    return(c(cp$Theta,cp$Rho,cp$DeltaD,cp$DeltaS,cp$Lambda,cp$LambdaRight,cp$LambdaDisp,cp$Nu))
 }

@@ -385,7 +385,7 @@ plotTrace<- function(dat,main,k=111){
 }

 plotEverything <- function(mcmcOut,hi=0,pl,thin=100){
-    #Plots the MCMC traceplot in the form of a running median and 
+    #Plots the MCMC traceplot in the form of a running median and
    #histogram of the MCMC iterations
    if (sum(c(cu_pa$same_overhangs==FALSE,
                    cu_pa$fix_disp==FALSE,
@@ -438,7 +438,7 @@ plotEverything <- function(mcmcOut,hi=0,pl,thin=100){
 }

 accRat <- function(da){
-    #A rough measure of the acceptance ratio 
+    #A rough measure of the acceptance ratio
    return(length(unique(da))/length(da))
 }

@@ -467,7 +467,7 @@ adjustPropVar <- function(mcmc,propVar){
 }

 runGibbs <- function(cu_pa,iter){
-    #Sampling over the conditional posterior distribution 
+    #Sampling over the conditional posterior distribution
    #for the parameters.
    esti <- matrix(nrow=iter,ncol=9)
    colnames(esti) <- c("Theta","Rho","DeltaD","DeltaS","Lambda","LambdaRight","LambdaDisp","Nu","LogLik")
@@ -492,7 +492,7 @@ runGibbs <- function(cu_pa,iter){
            #Update the nu parameter by via MC estimation
            cu_pa<-updateNu(cu_pa)
        }
-        esti[i,c(1:8)] <- getParams(cu_pa) 
+        esti[i,c(1:8)] <- getParams(cu_pa)
        esti[i,"LogLik"] <- logLikAll(cu_pa$dat,cu_pa$ThetaMat,cu_pa$DeltaD,cu_pa$DeltaS,cu_pa$laVec,cu_pa$nuVec,cu_pa$m)
        if (! (i %% 1000) && cu_pa$verbose){
            cat("MCMC-Iter\t",i,"\t",esti[i,"LogLik"],"\n")
@@ -504,7 +504,7 @@ runGibbs <- function(cu_pa,iter){

 simPredCheck <- function(da,output){
    #Simulates one draw from the posterior predictive distribution
-    #and the probability of a C>T substitution because of a cytosine 
+    #and the probability of a C>T substitution because of a cytosine
    #demnation.
    bases <- da[,c("A","C","G","T")]
    #Constructing the lambda vector
@@ -541,12 +541,12 @@ simPredCheck <- function(da,output){
                                         output$cu_pa$mLe,
                                         output$cu_pa$forward_only,
                                         output$cu_pa$nuSamples,
-                                         output$cu_pa$ds_protocol) 
+                                         output$cu_pa$ds_protocol)
        nuVec <- c(nuVec,rev(1-nuVec))
    }else {
        nuVec <- output$cu_pa$nuVec
    }
-    
+
    #Sample the other parameters
    des <- sample(output$out[,"DeltaS"],1)
    ded <- sample(output$out[,"DeltaD"],1)
@@ -562,10 +562,10 @@ simPredCheck <- function(da,output){
    subs <- matrix(NA,nrow=nrow(output$cu_pa$dat),ncol=4+length(coln))
    colnames(subs) <- c("A","C","G","T",coln)
    #
-    damProb <- rep(NA,nrow(output$cu_pa$dat)) 
-    damProbGA <- damProb 
+    damProb <- rep(NA,nrow(output$cu_pa$dat))
+    damProbGA <- damProb
    for (i in 1:nrow(output$cu_pa$dat)){
-        #Construct the site specific probabilities 
+        #Construct the site specific probabilities
        pct <- nuVec[i]*(laVec[i]*des+ded*(1-laVec[i]))
        pga <- (1-nuVec[i])*(laVec[i]*des+ded*(1-laVec[i]))
        pDamMat <- matrix(c(
@@ -574,11 +574,11 @@ simPredCheck <- function(da,output){
                         pga,0,1-pga,0,
                         0,0,0,1
                         ),nrow=4,byrow=TRUE)
-        ThetapDam <- pDamMat %*% pmat 
+        ThetapDam <- pDamMat %*% pmat
        #Calculate the probability C.T due to cytosine demanation
-        damProb[i] <- ptransCC*pct/(ptransCC*pct+ptransCT) 
-        #Do not forget the reverse complement 
-        damProbGA[i] <- ptransGG*pga/(ptransGG*pga+ptransGA) 
+        damProb[i] <- ptransCC*pct/(ptransCC*pct+ptransCT)
+        #Do not forget the reverse complement
+        damProbGA[i] <- ptransGG*pga/(ptransGG*pga+ptransGA)
        #Then draw from a multinomial distribution
        subs[i,c("A.C","A.G","A.T")] <- t(rmultinom(1,output$cu_pa$dat[i,"A"],ThetapDam[1,]))[-1]/output$cu_pa$dat[i,"A"]
        subs[i,c("C.A","C.G","C.T")] <- t(rmultinom(1,output$cu_pa$dat[i,"C"],ThetapDam[2,]))[-2]/output$cu_pa$dat[i,"C"]
@@ -602,7 +602,7 @@ calcSampleStats <- function(da,X){

 postPredCheck <- function(da,output,samples=10000){
    #Plots the 95% posterior predictive intervals with the data as lines.
-    #Returns the site specific probability of a C>T or G>A substitution 
+    #Returns the site specific probability of a C>T or G>A substitution
    #because of a cytosine demnation.
    CTs <- matrix(NA,nrow=nrow(da),ncol=samples)
    GAs <- matrix(NA,nrow=nrow(da),ncol=samples)
@@ -635,28 +635,28 @@ postPredCheck <- function(da,output,samples=10000){
    }
    CTsStats <- calcSampleStats(da,CTs)
    GAsStats <- calcSampleStats(da,GAs)
-    REsStats <- calcSampleStats(da,REs) 
+    REsStats <- calcSampleStats(da,REs)
    #Plotting the posterior predictive intervals
    p <- ggplot()+
-         geom_point(aes(x,mea,colour="C->T",aes_string="subs"),data=CTsStats)+
+         geom_point(aes(x,mea,colour="C->T"),data=CTsStats)+
         geom_point(aes(x,mea,colour="G->A"),data=GAsStats)+
         geom_point(aes(x,mea,colour="Others"),data=REsStats)+
-         geom_errorbar(aes(x=x,y=med,ymin=loCI,ymax=hiCI,color="C->T"),data=CTsStats)+
-         geom_errorbar(aes(x=x,y=med,ymin=loCI,ymax=hiCI,color="G->A"),data=GAsStats)+
-         geom_errorbar(aes(x=x,y=med,ymin=loCI,ymax=hiCI,color="Others"),data=REsStats)+
+         geom_errorbar(aes(x=x,ymin=loCI,ymax=hiCI,color="C->T"),data=CTsStats)+
+         geom_errorbar(aes(x=x,ymin=loCI,ymax=hiCI,color="G->A"),data=GAsStats)+
+         geom_errorbar(aes(x=x,ymin=loCI,ymax=hiCI,color="Others"),data=REsStats)+
         geom_line(aes(oneBased,C.T/C),color="red",data=data.frame(da))+
         geom_line(aes(oneBased,G.A/G),color="green",data=data.frame(da))+
         geom_line(aes(oneBased,((A.C+A.G+A.T)/A+(C.A+C.G)/C+(G.C+G.T)/G+(T.A+T.C+T.G)/T)/10),color="blue",data=data.frame(da))+
-         ylab("Substitution rate")+
-         xlab("Relative position")+
-         scale_x_continuous(breaks=bres,labels=labs)+
-         labs(colour = "Subs. type")+
-         ggtitle("Posterior prediction intervals")
+         labs(y = "Substitution rate",
+              x = "Relative position",
+              colour = "Subs. type",
+              title = "Posterior prediction intervals")+
+         scale_x_continuous(breaks=bres,labels=labs)
    if (output$cu_pa$use_bw_theme){
        p <- p+theme_bw()
    }
    plot(p)
-    #The correcting probabilities 
+    #The correcting probabilities
    coProbs <- cbind(da[,"Pos"],apply(C2TProbs,1,mean),apply(G2AProbs,1,mean))
    colnames(coProbs) <- c("Position","C.T","G.A")
    return(coProbs)
@@ -686,9 +686,8 @@ writeMCMC <- function(out,filename){
    qua <- apply(out$out[,parameters],2,quantile,seq(from=0,to=1,by=.025))
    acc <- apply(out$out[,parameters],2,accRat)
    summStat <- rbind(mea,std,acc,qua)
-    rownames(summStat)[1] <- "Mean" 
-    rownames(summStat)[2] <- "Std." 
-    rownames(summStat)[3] <- "Acceptance ratio" 
+    rownames(summStat)[1] <- "Mean"
+    rownames(summStat)[2] <- "Std."
+    rownames(summStat)[3] <- "Acceptance ratio"
    write.csv(summStat,paste(filename,"_summ_stat.csv",sep=""))
 }
-
--- a/mapdamage/Rscripts/stats/main.R
+++ b/mapdamage/Rscripts/stats/main.R
 #The main work flow of the package
 #Load the libraries
-suppressMessages(library(inline))  #Already checked the libraries 
-suppressMessages(library(ggplot2)) #thus ignoring any messages from them
-suppressMessages(library(Rcpp))
-suppressMessages(library(gam)) 
-suppressMessages(library(RcppGSL))
+suppressPackageStartupMessages(library(inline))  #Already checked the libraries
+suppressPackageStartupMessages(library(ggplot2)) #thus ignoring any messages from them
+suppressPackageStartupMessages(library(Rcpp))
+suppressPackageStartupMessages(library(gam))
+suppressPackageStartupMessages(library(RcppGSL))

-#Miscellaneous functions 
+#Miscellaneous functions
 source(paste(path_to_mapDamage_stats,"function.R",sep=""))

 #Prior and proposal distributions for the parameters
@@ -18,7 +18,7 @@ source(paste(path_to_mapDamage_stats,"postConditonal.R",sep=""))
 #functions for the grid search
 source(paste(path_to_mapDamage_stats,"start.R",sep=""))

-#functions for loading the data 
+#functions for loading the data
 source(paste(path_to_mapDamage_stats,"data.R",sep=""))


@@ -39,14 +39,14 @@ if (forward_only && reverse_only){
    stop()
 }

-fow_dat <-readMapDamData(path_to_dat) 
+fow_dat <-readMapDamData(path_to_dat)
 rev_dat <-readMapDamData(path_to_dat,forward=0)
 if (forward_only){
    #Taking only the forward part
-    dat <- fow_dat[1:sub_length,] 
+    dat <- fow_dat[1:sub_length,]
 }else if (reverse_only){
    #Taking only the reverse part
-    dat <- rev_dat[sub_length:1,] 
+    dat <- rev_dat[sub_length:1,]
 }else {
    #Using both ends
    dat <- joinFowAndRev(fow_dat,rev_dat,sub_length)
@@ -107,7 +107,7 @@ cu_pa$ThetaMat <- getPmat(cu_pa$Theta,cu_pa$Rho,cu_pa$acgt)

 #######################################################
 #
-#              Setting the lambda vector 
+#              Setting the lambda vector
 #
 #######################################################

@@ -127,17 +127,17 @@ if (!cu_pa$same_overhangs){

 #######################################################
 #
-#              Setting the nu vector 
+#              Setting the nu vector
 #
 #######################################################

 if (!cu_pa$ds_protocol & cu_pa$nuSamples!=0){
-    #Using the single stranded protocol with nu samples which makes 
+    #Using the single stranded protocol with nu samples which makes
    #no sense
    write("Silly to use the MC estimation for nu_i if using the single stranded protocol", stderr())
    stop()
 }else if (cu_pa$nuSamples!=0 & cu_pa$fix_nu) {
-    #Using the fixed nu parameter with nu samples which makes no sense 
+    #Using the fixed nu parameter with nu samples which makes no sense
    write("Silly to use the MC estimation for nu_i and use fix_nu ", stderr())
    stop()
 }else if (cu_pa$nuSamples!=0){
@@ -164,7 +164,7 @@ if (!cu_pa$ds_protocol & cu_pa$nuSamples!=0){
        cu_pa$nuVec <- c(rep(1,nrow(dat)/2),rep(0,nrow(dat)/2))
    }
 }else {
-    #This is for a non linear nick frequency, assumes the G>T and G>A are 
+    #This is for a non linear nick frequency, assumes the G>T and G>A are
    #mostly due to DNA damage patterns do not use for low damage datasets
    te<-(dat[,"C.T"]/dat[,"C"])/(dat[,"G.A"]/dat[,"G"]+dat[,"C.T"]/dat[,"C"])
    if (sum(is.na(te) )!=0 ){
@@ -177,7 +177,7 @@ if (!cu_pa$ds_protocol & cu_pa$nuSamples!=0){
            cu_pa$nuVec <- c(rep(1,nrow(dat)/2),rep(0,nrow(dat)/2))
        }
    } else {
-        #The substitutes seem to be okay estimate the nick frequency using GAM 
+        #The substitutes seem to be okay estimate the nick frequency using GAM
        if (cu_pa$forward_only || cu_pa$reverse_only){
            if (cu_pa$use_raw_nick_freq){
                #Use the frequency
@@ -202,14 +202,14 @@ if (!cu_pa$ds_protocol & cu_pa$nuSamples!=0){
 }
 #######################################################
 #
-#          Finding an "optimal" starting place 
+#          Finding an "optimal" starting place
 #
 #######################################################

 if (grid_iter!=0){
    #Start at random places and optimize the likelihood function
-    cu_pa <- gridSearch(cu_pa,grid_iter) 
-} 
+    cu_pa <- gridSearch(cu_pa,grid_iter)
+}

 #Calculate the log likelihood in the beginning
 if (cu_pa$same_overhangs){
@@ -267,11 +267,11 @@ if (out_file_base!=""){
    pdf(paste(out_file_base,"_MCMC_hist.pdf",sep=""))
    plotEverything(mcmcOut,hi=1)
    dev.off()
-    #Posterior predictive plot 
+    #Posterior predictive plot
    pdf(paste(out_file_base,"_MCMC_post_pred.pdf",sep=""))
    siteProb <- postPredCheck(dat,mcmcOut)
    dev.off()
-    #Write correcting probabilities 
+    #Write correcting probabilities
    write.csv(siteProb,paste(out_file_base,"_MCMC_correct_prob.csv",sep=""))
 } else {
    cat("Plotting\n")

--- a/mapdamage/__init__.py
+++ b/mapdamage/__init__.py
-#!/usr/bin/env python
+#!/usr/bin/env python3

 import mapdamage.parseoptions
 import mapdamage.seq

--- a/mapdamage/align.py
+++ b/mapdamage/align.py
-#!/usr/bin/env python
-
 import itertools
 import mapdamage

@@ -16,11 +14,11 @@ import mapdamage
 #X 8 sequence mismatch


-def get_coordinates(read):  
+def get_coordinates(read):
  """ return external coordinates of aligned read bases """
  fivep = read.aend if read.is_reverse else read.pos
  threep = read.pos if read.is_reverse else read.aend
-  
+
  return fivep, threep


@@ -41,9 +39,9 @@ def get_around(coord, chrom, reflengths, length, ref):


 def align(cigarlist, seq, ref):
-  """ insert gaps according to the cigar string 
-  deletion: gaps to be inserted into read sequences, 
-  insertions: gaps to be inserted into reference sequence """  
+  """ insert gaps according to the cigar string
+  deletion: gaps to be inserted into read sequences,
+  insertions: gaps to be inserted into reference sequence """
  lref = list(ref)
  for nbr, idx in parse_cigar(cigarlist, 1):
    lref[idx:idx] = ["-"] * nbr
@@ -56,9 +54,9 @@ def align(cigarlist, seq, ref):


 def align_with_qual(cigarlist, seq, qual, threshold, ref):
-  """ insert gaps according to the cigar string 
-  deletion: gaps to be inserted into read sequences and qualities, 
-  insertions: gaps to be inserted into reference sequence """  
+  """ insert gaps according to the cigar string
+  deletion: gaps to be inserted into read sequences and qualities,
+  insertions: gaps to be inserted into reference sequence """
  lref = list(ref)
  for nbr, idx in parse_cigar(cigarlist, 1):
    lref[idx:idx] = ["-"] * nbr
@@ -84,7 +82,7 @@ def get_mis(read, seq, refseq, ref, length, tab, end):
  std = '-' if read.is_reverse else '+'
  subtable = tab[ref][end][std]

-  for (i, nt_seq, nt_ref) in itertools.izip(xrange(length), seq, refseq):
+  for (i, nt_seq, nt_ref) in zip(range(length), seq, refseq):
    if (nt_seq in "ACGT-") and (nt_ref in "ACGT-"):
      if nt_ref != "-":
        # record base composition in the reference, only A, C, G, T
@@ -106,7 +104,7 @@ def parse_cigar(cigarlist, ope):
  for operation, length in cigarlist:
    if operation == ope:
        coordinate.append([length, tlength])
-    if operation in oplist: 
+    if operation in oplist:
        tlength += length
  return coordinate

@@ -127,8 +125,3 @@ def record_soft_clipping(read, tab, length):
      end = '5p' if read.is_reverse else '3p'

    update_table(end, strand, nbases)
-
-
-
-
-
--- a/mapdamage/composition.py
+++ b/mapdamage/composition.py
-#!/usr/bin/env python
-
 import mapdamage
 import itertools
 import csv
@@ -10,8 +8,8 @@ def count_ref_comp(read, chrom, before, after, comp):
  """ record basae composition in external genomic regions """
  std = '-' if read.is_reverse else '+'

-  _update_table(comp[chrom]['5p'][std], before, xrange(-len(before), 0))
-  _update_table(comp[chrom]['3p'][std], after,  xrange(1, len(after) + 1))
+  _update_table(comp[chrom]['5p'][std], before, range(-len(before), 0))
+  _update_table(comp[chrom]['3p'][std], after,  range(1, len(after) + 1))


 def count_read_comp(read, chrom, length, comp):
@@ -20,12 +18,12 @@ def count_read_comp(read, chrom, length, comp):
  if read.is_reverse:
    std, seq = '-', mapdamage.seq.revcomp(seq)

-  _update_table(comp[chrom]['5p'][std], seq,           xrange(1, length + 1))
-  _update_table(comp[chrom]['3p'][std], reversed(seq), xrange(-1, - length - 1, -1))
+  _update_table(comp[chrom]['5p'][std], seq,           range(1, length + 1))
+  _update_table(comp[chrom]['3p'][std], reversed(seq), range(-1, - length - 1, -1))


 def _update_table(table, sequence, indices):
-  for (index, nt) in itertools.izip(indices, sequence):
+  for (index, nt) in zip(indices, sequence):
    if nt in table:
      table[nt][index] += 1

@@ -35,7 +33,7 @@ def get_base_comp(filename,destination=False):
    Gets the basecomposition of all the sequences in filename
    and returns the value to destination if given.
    """
-    path_to_seqtk = mapdamage.rscript.construct_path("seqtk",folder="seqtk") 
+    path_to_seqtk = mapdamage.rscript.construct_path("seqtk",folder="seqtk")
    bases = {"A":0,"C":0,"G":0,"T":0}
    alp = ["A","C","G","T"]
    try:
@@ -47,7 +45,7 @@ def get_base_comp(filename,destination=False):
            bases["C"] = bases["C"] + int(tabs[3])
            bases["G"] = bases["G"] + int(tabs[4])
            bases["T"] = bases["T"] + int(tabs[5])
-    except (OSError, ValueError), error:
+    except (OSError, ValueError) as error:
        sys.stderr.write("Error: Seqtk failed: %s\n" % (error,))
        sys.exit(1)
    # get the base frequencies
@@ -75,7 +73,7 @@ def read_base_comp(filename):
        lp = li.split()
        if first_line:
            header = lp
-            first_line = False 
+            first_line = False
        else:
            body = lp
    bases = {}

--- a/mapdamage/parseoptions.py
+++ b/mapdamage/parseoptions.py
-#!/usr/bin/env python
-
 from optparse import OptionParser, OptionGroup, SUPPRESS_HELP
 import os
 import sys
@@ -38,7 +36,7 @@ def check_py_version():
        return None


-def options():  
+def options():
    parser = OptionParser("%prog [options] -i BAMfile -r reference.fasta\n\nUse option -h or --help for help", version=__version__, \
            epilog="report bugs to aginolhac@snm.ku.dk, MSchubert@snm.ku.dk or jonsson.hakon@gmail.com")

@@ -153,7 +151,7 @@ def options():
    if not check_py_version():
        return None

-    # if the user wants to check the R packages then do that before the option parsing 
+    # if the user wants to check the R packages then do that before the option parsing
    if options.check_R_packages:
        if check_R_lib():
            sys.exit(1)
@@ -224,7 +222,7 @@ def options():

    # check destination for rescaled bam
    if not options.rescale_out and (options.rescale or options.rescale_only):
-        # if there are mulitiple bam files to rescale then pick first one as 
+        # if there are mulitiple bam files to rescale then pick first one as
        # the name of the rescaled file
        if isinstance(options.filename,list):
            basename = os.path.basename(options.filename[0])
@@ -235,12 +233,12 @@ def options():

    if os.path.isdir(options.folder):
        if not options.quiet and not options.plot_only:
-            print("Warning, %s already exists" % options.folder)
+            print(("Warning, %s already exists" % options.folder))
        if options.plot_only:
            if not file_exist(options.folder+"/dnacomp.txt") or not file_exist(options.folder+"/misincorporation.txt"):
                parser.error('folder %s is not a valid result folder' % options.folder)
    else:
-        os.makedirs(options.folder, mode = 0750)
+        os.makedirs(options.folder, mode = 0o750)
        if options.plot_only or options.stats_only or options.rescale_only:
            sys.stderr.write("Error, %s does not exist while plot/stats/rescale only was used\n" % options.folder)
            return None
@@ -279,4 +277,3 @@ def options():


    return options
-
--- a/mapdamage/rescale.py
+++ b/mapdamage/rescale.py
@@ -31,14 +31,14 @@ def get_corr_prob(folder, rescale_length_5p, rescale_length_3p):
        fi_handle = csv.DictReader(open(full_path))
        corr_prob = {}
        for line in fi_handle:
-            if (corr_prob.has_key(line["Position"])):
+            if (line["Position"] in corr_prob):
                sys.exit('This file has multiple position definitions %s, line %d: %s' % \
                    (folder, fi_handle.line_num, corr_prob[line["Position"]]))
            else:
                corr_prob[int(line["Position"])] = {'C.T':float(line["C.T"]), 'G.A':float(line["G.A"])}

        # Exclude probabilities for positions outside of user-specified region
-        for key in corr_prob.keys():
+        for key in list(corr_prob.keys()):
            if key < -rescale_length_3p or key > rescale_length_5p:
                corr_prob.pop(key)

@@ -75,13 +75,13 @@ def corr_this_base(corr_prob, nt_seq, nt_ref, pos, length,direction="both"):
    back_pos = pos-length-1 
    # position from 3' end

-    if corr_prob.has_key(pos):
+    if pos in corr_prob:
        p5_corr = corr_prob[pos][subs]
        # correction from 5' end
    else:
        p5_corr = 0

-    if corr_prob.has_key(back_pos):
+    if back_pos in corr_prob:
        p3_corr = corr_prob[back_pos][subs]
        # correction from 3' end
    else:
@@ -104,7 +104,7 @@ def corr_this_base(corr_prob, nt_seq, nt_ref, pos, length,direction="both"):

 def initialize_subs():
    """Initialize a substitution table, to track the expected substitution counts"""
-    per_qual = dict(zip(range(0,130),[0]*130))
+    per_qual = dict(list(zip(list(range(0,130)),[0]*130)))
    subs = {"CT-before":per_qual.copy(),\
            "TC-before":per_qual.copy(),\
            "GA-before":per_qual.copy(),\
@@ -164,7 +164,7 @@ def qual_summary_subs(subs):
            for qv in subs[i]:
                if qv >= lv :
                    key = i+"-Q"+str(lv)
-                    if subs.has_key(key):
+                    if key in subs:
                        subs[key] += subs[i][qv]
                    else:
                        subs[key] = subs[i][qv]
@@ -174,32 +174,32 @@ def print_subs(subs):
    print("\tThe expected substition frequencies before and after scaling using the scaled qualities as probalities:")
    if subs["C"]!=0:
        # the special case of no substitutions
-        print("\tCT\t"+str(subs["CT-pvals_before"]/subs["C"])+"\t\t"+str(subs["CT-pvals"]/subs["C"]))
+        print(("\tCT\t"+str(subs["CT-pvals_before"]/subs["C"])+"\t\t"+str(subs["CT-pvals"]/subs["C"])))
    else: 
        print("\tCT\tNA\t\tNA")
    if subs["T"]!=0:
-        print("\tTC\t"+str(subs["TC-pvals"]/subs["T"])+"\t\t"+str(subs["TC-pvals"]/subs["T"]))
+        print(("\tTC\t"+str(subs["TC-pvals"]/subs["T"])+"\t\t"+str(subs["TC-pvals"]/subs["T"])))
    else:
        print("\tTC\tNA\t\tNA")
    if subs["G"]!=0:
-        print("\tGA\t"+str(subs["GA-pvals_before"]/subs["G"])+"\t\t"+str(subs["GA-pvals"]/subs["G"]))
+        print(("\tGA\t"+str(subs["GA-pvals_before"]/subs["G"])+"\t\t"+str(subs["GA-pvals"]/subs["G"])))
    else:
        print("\tGA\tNA\t\tNA")
    if subs["A"]!=0:
-        print("\tAG\t"+str(subs["AG-pvals"]/subs["A"])+"\t\t"+str(subs["AG-pvals"]/subs["A"]))
+        print(("\tAG\t"+str(subs["AG-pvals"]/subs["A"])+"\t\t"+str(subs["AG-pvals"]/subs["A"])))
    else:
        print("\tAG\tNA\t\tNA")
    print("\tQuality metrics before and after scaling")
-    print("\tCT-Q0 \t"+str(subs["CT-before-Q0"])+"\t\t"+str(subs["CT-after-Q0"]))
-    print("\tCT-Q10 \t"+str(subs["CT-before-Q10"])+"\t\t"+str(subs["CT-after-Q10"]))
-    print("\tCT-Q20 \t"+str(subs["CT-before-Q20"])+"\t\t"+str(subs["CT-after-Q20"]))
-    print("\tCT-Q30 \t"+str(subs["CT-before-Q30"])+"\t\t"+str(subs["CT-after-Q30"]))
-    print("\tCT-Q40 \t"+str(subs["CT-before-Q40"])+"\t\t"+str(subs["CT-after-Q40"]))
-    print("\tGA-Q0 \t"+str(subs["GA-before-Q0"])+"\t\t"+str(subs["GA-after-Q0"]))
-    print("\tGA-Q10 \t"+str(subs["GA-before-Q10"])+"\t\t"+str(subs["GA-after-Q10"]))
-    print("\tGA-Q20 \t"+str(subs["GA-before-Q20"])+"\t\t"+str(subs["GA-after-Q20"]))
-    print("\tGA-Q30 \t"+str(subs["GA-before-Q30"])+"\t\t"+str(subs["GA-after-Q30"]))
-    print("\tGA-Q40 \t"+str(subs["GA-before-Q40"])+"\t\t"+str(subs["GA-after-Q40"]))
+    print(("\tCT-Q0 \t"+str(subs["CT-before-Q0"])+"\t\t"+str(subs["CT-after-Q0"])))
+    print(("\tCT-Q10 \t"+str(subs["CT-before-Q10"])+"\t\t"+str(subs["CT-after-Q10"])))
+    print(("\tCT-Q20 \t"+str(subs["CT-before-Q20"])+"\t\t"+str(subs["CT-after-Q20"])))
+    print(("\tCT-Q30 \t"+str(subs["CT-before-Q30"])+"\t\t"+str(subs["CT-after-Q30"])))
+    print(("\tCT-Q40 \t"+str(subs["CT-before-Q40"])+"\t\t"+str(subs["CT-after-Q40"])))
+    print(("\tGA-Q0 \t"+str(subs["GA-before-Q0"])+"\t\t"+str(subs["GA-after-Q0"])))
+    print(("\tGA-Q10 \t"+str(subs["GA-before-Q10"])+"\t\t"+str(subs["GA-after-Q10"])))
+    print(("\tGA-Q20 \t"+str(subs["GA-before-Q20"])+"\t\t"+str(subs["GA-after-Q20"])))
+    print(("\tGA-Q30 \t"+str(subs["GA-before-Q30"])+"\t\t"+str(subs["GA-after-Q30"])))
+    print(("\tGA-Q40 \t"+str(subs["GA-before-Q40"])+"\t\t"+str(subs["GA-after-Q40"])))
    

 def rescale_qual_read(bam, read, ref, corr_prob,subs, debug = False,direction="both"):
@@ -237,7 +237,7 @@ def rescale_qual_read(bam, read, ref, corr_prob,subs, debug = False,direction="b
    new_qual = [-100]*length_read
    pos_on_read = 0
    number_of_rescaled_bases = 0.0
-    for (i, nt_seq, nt_ref, nt_qual) in itertools.izip(xrange(length_align), seq, refseq, qual):
+    for (i, nt_seq, nt_ref, nt_qual) in zip(range(length_align), seq, refseq, qual):
        # rescale the quality according to the triplet position, 
        # pair of the reference and the sequence
        if ((nt_seq == "T" and nt_ref =="C") or (nt_seq == "A" and nt_ref =="G")):

--- a/mapdamage/rescale_test.py
+++ b/mapdamage/rescale_test.py
 import unittest
 import optparse
-import rescale
+from . import rescale
 import pysam
 import filecmp


--- a/mapdamage/rscript.py
+++ b/mapdamage/rscript.py
-#!/usr/bin/env python
-
 import os
 import subprocess
 from subprocess import CalledProcessError, check_call
@@ -15,8 +13,8 @@ def construct_path(name,folder="Rscripts"):

 def plot(opt):
  """
-  Calls the R script to make the plots, takes in the arguments 
-  from parameter processing 
+  Calls the R script to make the plots, takes in the arguments
+  from parameter processing
  """
  # comp<-args[1]
  # pdfout<-args[2]
@@ -32,33 +30,33 @@ def plot(opt):
  fcomp = opt.folder+"/"+"dnacomp.txt"
  title = opt.folder+"/"+"Fragmisincorporation_plot.pdf"

-  script = construct_path("mapDamage.R") 
+  script = construct_path("mapDamage.R")
  call = ["Rscript", script, fcomp, title, opt.refplot, fmut, opt.readplot, \
      opt.ymax, opt.folder, opt.title, __version__]
-  code = subprocess.call(map(str, call))
+  code = subprocess.call(list(map(str, call)))

  if code == 0:
    if not opt.quiet:
-      print("pdf %s generated" % title)
+      print(("pdf %s generated" % title))
    return 0
  else:
    print("Error: plotting with R failed")
    return 1


-        
-def opt_plots(opt):       
+
+def opt_plots(opt):
  """ optional plots for length distribution
  and cumulative C>T mutations, per strand """
-  
+
  fmut = opt.folder+"/"+"misincorporation.txt"
  flength = opt.folder+"/"+"lgdistribution.txt"
  output = opt.folder+"/"+"Length_plot.pdf"

-  script = construct_path("lengths.R") 
+  script = construct_path("lengths.R")
  call = ["Rscript", script, flength, output, fmut, opt.length, \
      opt.title, __version__, bool_to_int(opt.quiet)]
-  code = subprocess.call(map(str, call))
+  code = subprocess.call(list(map(str, call)))
  if code == 0:
    return 0
  else:
@@ -77,7 +75,7 @@ def check_R_one_lib(name):

 def check_R_lib():
    """
-    Checks if the necessary R libraries are here, signal 
+    Checks if the necessary R libraries are here, signal
    otherwise
    """
    libs = ["inline", "ggplot2", "gam", "Rcpp", "RcppGSL"]
@@ -90,11 +88,11 @@ def check_R_lib():
    if len(missing_lib) > 1:
        # Grammar Nazi has arrived
        last_ele = missing_lib.pop()
-        print ("Missing the following R libraries '" + "', '".join(missing_lib) \
-            + "' and '" + last_ele + "'")
+        print(("Missing the following R libraries '" + "', '".join(missing_lib) \
+            + "' and '" + last_ele + "'"))
        return 1
    elif len(missing_lib) == 1:
-        print ("Missing the following R library "+missing_lib[0])
+        print(("Missing the following R library "+missing_lib[0]))
        return 1
    else :
        # No missing libraries

--- a/mapdamage/seq.py
+++ b/mapdamage/seq.py
-#!/usr/bin/env python
 import sys
 import string

 # from Martin Kircher, to complement DNA
-TABLE = string.maketrans('TGCAMRWSYKVHDBtgcamrwsykvhdb', \
+TABLE = str.maketrans('TGCAMRWSYKVHDBtgcamrwsykvhdb', \
    'ACGTKYWSRMBDHVacgtkywsrmbdhv')

 LETTERS = ("A", "C", "G", "T")
 MUTATIONS = ('G>A', 'C>T', 'A>G', 'T>C', 'A>C', 'A>T', 'C>G', 'C>A', 'T>G',
-             'T>A', 'G>C', 'G>T', 'A>-', 'T>-', 'C>-', 'G>-', '->A', '->T', 
+             'T>A', 'G>C', 'G>T', 'A>-', 'T>-', 'C>-', 'G>-', '->A', '->T',
             '->C', '->G', 'S')
 HEADER = LETTERS + ("Total", ) + MUTATIONS

@@ -33,11 +32,11 @@ def record_lg(read, coordinate, tab):
  """ record global length distribution
  don't record paired reads as they are normally not used for aDNA """
  std = '-' if read.is_reverse else '+'
-  
+
  length = (max(coordinate) - min(coordinate))
  if not read.is_paired:
    tab[std][length] = tab[std][length] + 1
-    
+
  return tab


@@ -47,7 +46,7 @@ def read_fasta_index(filename):
    sys.stderr.write("Error: %s\n" % msg)
    sys.stderr.write("       Filename: %s\n" % filename)
    sys.stderr.write("       Line:     %s\n" % repr(line))
-  
+
  fai = {}
  with open(filename, 'r') as handle:
    for line in handle:

--- a/mapdamage/tables.py
+++ b/mapdamage/tables.py
-#!/usr/bin/env python
-
 import os
 import collections

@@ -7,7 +5,7 @@ import mapdamage
 from mapdamage.version import __version__


-def initialize_mut(ref, length):  
+def initialize_mut(ref, length):
  tab = {}
  for contig in ref:
    tab_contig = tab[contig] = {}
@@ -16,8 +14,8 @@ def initialize_mut(ref, length):
      for std in ('+','-'):
        tab_std = tab_end[std] = {}
        for mut in mapdamage.seq.HEADER:
-          tab_std[mut] = dict.fromkeys(xrange(length), 0)
-  
+          tab_std[mut] = dict.fromkeys(range(length), 0)
+
  return tab


@@ -26,8 +24,8 @@ def print_mut(mut, opt, out):


 def initialize_comp(ref, around, length):
-  keys = {"3p" : range(-length, 0) + range(1, around + 1),
-          "5p" : range(-around, 0) + range(1, length + 1)}
+  keys = {"3p" : list(range(-length, 0)) + list(range(1, around + 1)),
+          "5p" : list(range(-around, 0)) + list(range(1, length + 1))}

  tab = {}
  for contig in ref:
@@ -52,7 +50,7 @@ def initialize_lg():
  for std in ('+','-'):
    tab[std] = collections.defaultdict(int)

-  return tab 
+  return tab


 def print_lg(tab, opt, out):
@@ -74,8 +72,8 @@ def check_table_and_warn_if_dmg_freq_is_low(folder):
  total = 0.0
  for filename in ("5pCtoT_freq.txt", "3pGtoA_freq.txt"):
    if not os.path.exists(folder+"/"+filename):
-      print("Error: Required table has not been created ('%s'), bayesian computation cannot be performed" \
-            % filename)
+      print(("Error: Required table has not been created ('%s'), bayesian computation cannot be performed" \
+            % filename))
      return True

    with open(os.path.join(folder, filename)) as handle:
@@ -85,12 +83,12 @@ def check_table_and_warn_if_dmg_freq_is_low(folder):
          total += float(freq[1])
          break
      else:
-        print("Error: Could not find pos = 1 in table '%s', bayesian computation cannot be performed" \
-              % filename)
+        print(("Error: Could not find pos = 1 in table '%s', bayesian computation cannot be performed" \
+              % filename))
        return True

  if total < 0.01:
-    print("Warning: DNA damage levels are too low, the Bayesian computation should not be performed (%f < 0.01)\n" % total)
+    print(("Warning: DNA damage levels are too low, the Bayesian computation should not be performed (%f < 0.01)\n" % total))

  return False

@@ -103,9 +101,9 @@ def _print_freq_table(table, columns, opt, out, offset = 0):
  out.write("# Chr: reference from sam/bam header, End: from which termini of DNA sequences, Std: strand of reads\n")
  out.write("Chr\tEnd\tStd\tPos\t%s\n" % ("\t".join(columns)))

-  for (reference, ends) in sorted(table.iteritems()):
-    for (end, strands) in sorted(ends.iteritems()):
-      for (strand, subtable) in sorted(strands.iteritems()):
+  for (reference, ends) in sorted(table.items()):
+    for (end, strands) in sorted(ends.items()):
+      for (strand, subtable) in sorted(strands.items()):
        subtable["Total"] = {}
        for index in sorted(subtable[columns[0]]):
          subtable["Total"][index] = sum(subtable[letter][index] for letter in mapdamage.seq.LETTERS)

--- a/mapdamage/version.py
+++ b/mapdamage/version.py
-#!/usr/bin/env python
+#!/usr/bin/env python3
 try:
    from mapdamage._version import __version__
 except ImportError:
-    __version__ = "2.0.8"
+    __version__ = "2.1.0"