Steffen Möller · Steffen Möller · Steffen Möller · 5b9f229a · 5b9f229a · 5b9f229a
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
+2018-09-10  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-map-probs:
+	Python3-ify last-map-probs
+	[83191e47259e] [tip]
+
+2018-09-07  Martin C. Frith  <Martin C. Frith>
+
+	* src/last-pair-probs.cc:
+	Make pair-probs almost work for ref rev strands
+	[da8c69023d0a]
+
+	* scripts/last-postmask:
+	postmask: fix asymmetric matrix & ref rev strand
+	[d4ecf378f967]
+
+2018-09-05  Martin C. Frith  <Martin C. Frith>
+
+	* doc/last-papers.txt, doc/last-train.txt, doc/lastal.txt,
+	doc/lastdb.txt:
+	Improve the documentation
+	[ffb4e02e04b0]
+
+	* scripts/fastq-interleave:
+	fastq-interleave: add /1 & /2 to identical names
+	[dd61bce162f0]
+
+	* doc/last-train.txt, scripts/last-train, test/last-train-test.out,
+	test/last-train-test.sh:
+	last-train: train substitutions from fastq!
+	[6de96c2afe47]
+
+2018-08-29  Martin C. Frith  <Martin C. Frith>
+
+	* src/lastal.cc:
+	Fix lastal -j7 quality-aware expected counts
+	[f70bcd86eb83]
+
+2018-08-24  Martin C. Frith  <Martin C. Frith>
+
+	* src/MultiSequence.cc, src/MultiSequence.hh,
+	src/MultiSequenceQual.cc, src/last.hh:
+	Read fastq faster
+	[41800ed89eb8]
+
+2018-08-21  Martin C. Frith  <Martin C. Frith>
+
+	* src/Alignment.cc, src/Centroid.cc, src/Centroid.hh, src/lastal.cc:
+	lastal -j7: quality-aware expected counts
+	[9e0b0d0c8438]
+
+2018-08-20  Martin C. Frith  <Martin C. Frith>
+
+	* src/Centroid.cc, src/QualityPssmMaker.cc, src/QualityPssmMaker.hh,
+	src/lastal.cc:
+	Refactor
+	[e454003e6a8a]
+
+2018-08-01  Martin C. Frith  <Martin C. Frith>
+
+	* doc/last-split.txt, src/split/last-split-main.cc, src/split/last-
+	split.cc, src/split/last-split.hh, test/last-split-test.out, test
+	/last-split-test.sh:
+	Add last-split --format option
+	[db2bd938c9b8]
+
+2018-07-30  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-postmask:
+	postmask: fix bug for spliced alignments
+	[e7da3afc2179]
+
+2018-07-17  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/maf-join:
+	Python3-ify maf-join
+	[764713f076f0]
+
+	* src/lastal.cc, test/last-test.out, test/last-test.sh:
+	Fix crash for bad score matrix
+	[318e420407d8]
+
+2018-06-25  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-dotplot:
+	Make last-dotplot really work with Python3
+	[82b69208c878]
+
+2018-05-28  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-dotplot, scripts/last-train, scripts/maf-convert,
+	scripts/maf-swap, test/last-test.sh, test/maf-swap-test.out, test
+	/maf-swap-test.sh:
+	Python3-ify: dotplot, train, convert, swap
+	[f3f1ec72542f]
+
+	* scripts/last-train, test/last-train-test.out:
+	Purely cosmetic change to last-train output
+	[05d2563d02e0]
+
+	* scripts/last-postmask, test/102.maf, test/last-postmask-test.out,
+	test/last-split-test.out:
+	Fix postmask header bug introduced in v938
+	[76cdd2203460]
+
 2018-05-14  Martin C. Frith  <Martin C. Frith>

 	* scripts/last-dotplot, scripts/last-postmask, test/102.maf, test
 	/last-postmask-test.out, test/last-postmask-test.sh, test/last-
 	split-test.out, test/last-split-test.sh:
 	postmask: fix bug for unusual alignment headers
-	[65969d4d464d] [tip]
+	[65969d4d464d]

 	* scripts/last-dotplot, scripts/last-map-probs, scripts/last-postmask,
 	scripts/last-train, scripts/maf-convert, scripts/maf-swap:

--- a/debian/changelog
+++ b/debian/changelog
+last-align (956-1) unstable; urgency=medium
+
+  * Team upload.
+
+  * New upstream version.
+  * Standards-Version: 4.2.1
+
+ -- Steffen Moeller <moeller@debian.org>  Tue, 02 Oct 2018 15:07:34 +0200
+
 last-align (938-1) unstable; urgency=medium

  * New upstream version

--- a/debian/control
+++ b/debian/control
@@ -8,7 +8,7 @@ Build-Depends: debhelper (>= 11~),
               help2man,
               python-pil,
               zlib1g-dev
-Standards-Version: 4.1.4
+Standards-Version: 4.2.1
 Vcs-Browser: https://salsa.debian.org/med-team/last-align
 Vcs-Git: https://salsa.debian.org/med-team/last-align.git
 Homepage: http://last.cbrc.jp/

--- a/debian/copyright
+++ b/debian/copyright
 Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: LAST
 Source: http://last.cbrc.jp/

 Files: *

--- a/doc/last-papers.html
+++ b/doc/last-papers.html
@@ -389,6 +389,11 @@ LAST-TRAIN</a>.  Hamada M, Ono Y, Asai K Frith MC.
 Bioinformatics. 2017 33(6):926-928.</p>
 <p>Describes last-train.</p>
 </li>
+<li><p class="first"><a class="reference external" href="https://ieeexplore.ieee.org/document/8288582/">A Simplified Description of Child Tables for Sequence Similarity
+Search</a>.  Frith MC, Shrestha A.  IEEE/ACM Trans Comput Biol
+Bioinform. 2018.</p>
+<p>Describes how LAST uses child tables.</p>
+</li>
 </ol>
 <div class="section" id="external-methods">
 <h2>External methods</h2>

--- a/doc/last-papers.txt
+++ b/doc/last-papers.txt
@@ -108,6 +108,14 @@ research to society.

    Describes last-train.

+13. `A Simplified Description of Child Tables for Sequence Similarity
+    Search`__.  Frith MC, Shrestha A.  IEEE/ACM Trans Comput Biol
+    Bioinform. 2018.
+
+    __ https://ieeexplore.ieee.org/document/8288582/
+
+    Describes how LAST uses child tables.
+
 External methods
 ----------------


--- a/doc/last-split.html
+++ b/doc/last-split.html
@@ -573,6 +573,11 @@ lastal -Q1 -D100 db q.fastq | last-split -c0 &gt; out.maf
 <kbd><span class="option">-h</span>, <span class="option">--help</span></kbd></td>
 <td>Show a help message, with default option values, and exit.</td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">-f</span>, <span class="option">--format=<var>FMT</var></span></kbd></td>
+<td>Choose the output format: MAF+ (the default), or MAF (which
+omits the last-split &quot;p&quot; lines).  The format name is not
+case-sensitive.</td></tr>
+<tr><td class="option-group">
 <kbd><span class="option">-g</span>, <span class="option">--genome=<var>NAME</var></span></kbd></td>
 <td>Do spliced alignment, and read splice signals (GT, AG, etc)
 from the named genome.  NAME should be the name of a lastdb

--- a/doc/last-split.txt
+++ b/doc/last-split.txt
@@ -181,6 +181,11 @@ Options
  -h, --help
         Show a help message, with default option values, and exit.

+  -f, --format=FMT
+         Choose the output format: MAF+ (the default), or MAF (which
+         omits the last-split "p" lines).  The format name is not
+         case-sensitive.
+
  -g, --genome=NAME
         Do spliced alignment, and read splice signals (GT, AG, etc)
         from the named genome.  NAME should be the name of a lastdb

--- a/doc/last-train.html
+++ b/doc/last-train.html
@@ -318,8 +318,9 @@ table.field-list { border: thin solid green }
 <div class="document" id="last-train">
 <h1 class="title">last-train</h1>

-<p>This script tries to find suitable score parameters (substitution and
-gap scores) for aligning some given sequences.</p>
+<p>last-train finds the rates (probabilities) of insertion, deletion, and
+substitutions between two sets of sequences.  It thereby finds
+suitable substitution and gap scores for aligning them.</p>
 <p>It (probabilistically) aligns the sequences using some initial score
 parameters, then estimates better score parameters based on the
 alignments, and repeats this procedure until the parameters stop
@@ -374,8 +375,8 @@ e.g. score(A→G) = score(G→A).</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">--pid=<var>PID</var></span></kbd></td>
 <td>Ignore alignments with &gt; PID% identity.  This aims to
-optimize the parameters for low-similarity alignments,
-similarly to the BLOSUM matrices.</td></tr>
+optimize the parameters for low-similarity alignments
+(similarly to the BLOSUM matrices).</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">--postmask=<var>NUMBER</var></span></kbd></td>
 <td>By default, last-train ignores alignments of mostly-lowercase
@@ -447,9 +448,16 @@ alignments: they are described in more detail at <a class="reference external" h
 <td>Which query strand to use: 0=reverse, 1=forward, 2=both.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-S <var>NUMBER</var></span></kbd></td>
-<td>Score matrix applies to forward strand of: 0=reference,
-1=query.  This matters only if the scores lack
-reverse-complement symmetry.</td></tr>
+<td><p class="first">Specify how to use the substitution score matrix for
+reverse strands.  If you use <tt class="docutils literal"><span class="pre">--revsym</span></tt>, this makes no
+difference.  &quot;0&quot; means that the matrix is used as-is for
+all alignments.  &quot;1&quot; (the default) means that the matrix
+is used as-is for alignments of query sequence forward
+strands, and the complemented matrix is used for query
+sequence reverse strands.</p>
+<p class="last">This parameter is always written in last-train's output,
+so it will override lastal's default.</p>
+</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-C <var>COUNT</var></span></kbd></td>
 <td>Before extending gapped alignments, discard any gapless
@@ -471,10 +479,18 @@ position in each query.</td></tr>
 <td>Number of parallel threads.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-Q <var>NUMBER</var></span></kbd></td>
-<td>Query sequence format: 0=fasta, 1=fastq-sanger.
-<strong>Important:</strong> if you use option -Q, last-train will only
-train the gap scores, and leave the substitution scores
-at their initial values.</td></tr>
+<td><p class="first">Query sequence format: 0=fasta, 1=fastq-sanger.</p>
+<p>last-train assumes that fastq quality codes indicate
+substitution error probabilities, <em>not</em> insertion or
+deletion error probabilities.  If this assumption is
+dubious (e.g. for data with many insertion or deletion
+errors), it may be better to use fasta.</p>
+<p>For fastq, last-train finds the rates of substitutions
+not explained by the quality data (ideally, real
+substitutions as opposed to errors).</p>
+<p class="last">If specified, this parameter is written in last-train's
+output, so it will override lastal's default.</p>
+</td></tr>
 </tbody>
 </table>
 </blockquote>
@@ -492,6 +508,8 @@ too dissimilar.  If it fails to find any alignments, you could try
 <a class="reference external" href="last-tutorial.html#example-6-find-very-short-dna-alignments">reducing the alignment significance threshold</a> with
 option -D.</p>
 </li>
+<li><p class="first">last-train cannot handle DNA-versus-protein alignment.</p>
+</li>
 </ul>
 </div>
 </div>

--- a/doc/last-train.txt
+++ b/doc/last-train.txt
 last-train
 ==========

-This script tries to find suitable score parameters (substitution and
-gap scores) for aligning some given sequences.
+last-train finds the rates (probabilities) of insertion, deletion, and
+substitutions between two sets of sequences.  It thereby finds
+suitable substitution and gap scores for aligning them.

 It (probabilistically) aligns the sequences using some initial score
 parameters, then estimates better score parameters based on the
@@ -44,8 +45,8 @@ Training options
         Force the insertion costs to equal the deletion costs.
  --pid=PID
         Ignore alignments with > PID% identity.  This aims to
-         optimize the parameters for low-similarity alignments,
-         similarly to the BLOSUM matrices.
+         optimize the parameters for low-similarity alignments
+         (similarly to the BLOSUM matrices).
  --postmask=NUMBER
         By default, last-train ignores alignments of mostly-lowercase
         sequence (by using `last-postmask <last-postmask.html>`_).
@@ -81,9 +82,17 @@ Alignment options
  -E EG2     Maximum expected alignments per square giga.  (See `here
             <last-evalues.html>`_.)
  -s NUMBER  Which query strand to use: 0=reverse, 1=forward, 2=both.
-  -S NUMBER  Score matrix applies to forward strand of: 0=reference,
-             1=query.  This matters only if the scores lack
-             reverse-complement symmetry.
+  -S NUMBER  Specify how to use the substitution score matrix for
+             reverse strands.  If you use ``--revsym``, this makes no
+             difference.  "0" means that the matrix is used as-is for
+             all alignments.  "1" (the default) means that the matrix
+             is used as-is for alignments of query sequence forward
+             strands, and the complemented matrix is used for query
+             sequence reverse strands.
+
+             This parameter is always written in last-train's output,
+             so it will override lastal's default.
+
  -C COUNT   Before extending gapped alignments, discard any gapless
             alignment whose query range lies in COUNT other gapless
             alignments with higher score-per-length.  This aims to
@@ -94,9 +103,19 @@ Alignment options
             position in each query.
  -P COUNT   Number of parallel threads.
  -Q NUMBER  Query sequence format: 0=fasta, 1=fastq-sanger.
-             **Important:** if you use option -Q, last-train will only
-             train the gap scores, and leave the substitution scores
-             at their initial values.
+
+             last-train assumes that fastq quality codes indicate
+             substitution error probabilities, *not* insertion or
+             deletion error probabilities.  If this assumption is
+             dubious (e.g. for data with many insertion or deletion
+             errors), it may be better to use fasta.
+
+             For fastq, last-train finds the rates of substitutions
+             not explained by the quality data (ideally, real
+             substitutions as opposed to errors).
+
+             If specified, this parameter is written in last-train's
+             output, so it will override lastal's default.

 Bugs
 ----
@@ -110,3 +129,5 @@ Bugs
  `reducing the alignment significance threshold
  <last-tutorial.html#example-6-find-very-short-dna-alignments>`_ with
  option -D.
+
+* last-train cannot handle DNA-versus-protein alignment.
--- a/doc/lastal.html
+++ b/doc/lastal.html
@@ -614,13 +614,13 @@ value as was used for lastdb -W.</td></tr>
 1 means forward only, and 2 means both.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-S <var>NUMBER</var></span></kbd></td>
-<td>Specify which DNA strand the score matrix applies to.  This
-matters only for unusual matrices that lack strand symmetry
-(e.g. if the a:g score differs from the t:c score).  0 means
-that the matrix applies to the forward strand of the reference
-aligned to either strand of the query.  1 means that the matrix
-applies to either strand of the reference aligned to the forward
-strand of the query.</td></tr>
+<td>Specify how to use the substitution score matrix for reverse
+strands.  This matters only for unusual matrices that lack
+strand symmetry (e.g. if the a:g score differs from the t:c
+score).  &quot;0&quot; means that the matrix is used as-is for all
+alignments.  &quot;1&quot; means that the matrix is used as-is for
+alignments of query sequence forward strands, and the
+complemented matrix is used for query sequence reverse strands.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-K <var>LIMIT</var></span></kbd></td>
 <td>Omit any alignment whose query range lies in LIMIT or more other
@@ -735,9 +735,10 @@ lowercase is masked.</p>
 <li><p class="first">Mask them for gapless and gapped extensions.</p>
 </li>
 </ol>
-<p class="last">&quot;Mask&quot; means change their match/mismatch scores to min(unmasked
-score, 0).  This option does not affect treatment of lowercase
-for initial matches.</p>
+<p>&quot;Mask&quot; means change their match/mismatch scores to min(unmasked
+score, 0), a.k.a. <a class="reference external" href="https://doi.org/10.1371/journal.pone.0028819">gentle masking</a>.</p>
+<p class="last">This option does not affect treatment of lowercase for initial
+matches.</p>
 </td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-w <var>DISTANCE</var></span></kbd></td>

--- a/doc/lastal.txt
+++ b/doc/lastal.txt
@@ -262,13 +262,13 @@ Miscellaneous options
      1 means forward only, and 2 means both.

  -S NUMBER
-      Specify which DNA strand the score matrix applies to.  This
-      matters only for unusual matrices that lack strand symmetry
-      (e.g. if the a:g score differs from the t:c score).  0 means
-      that the matrix applies to the forward strand of the reference
-      aligned to either strand of the query.  1 means that the matrix
-      applies to either strand of the reference aligned to the forward
-      strand of the query.
+      Specify how to use the substitution score matrix for reverse
+      strands.  This matters only for unusual matrices that lack
+      strand symmetry (e.g. if the a:g score differs from the t:c
+      score).  "0" means that the matrix is used as-is for all
+      alignments.  "1" means that the matrix is used as-is for
+      alignments of query sequence forward strands, and the
+      complemented matrix is used for query sequence reverse strands.

  -K LIMIT
      Omit any alignment whose query range lies in LIMIT or more other
@@ -368,8 +368,11 @@ Miscellaneous options
      3. Mask them for gapless and gapped extensions.

      "Mask" means change their match/mismatch scores to min(unmasked
-      score, 0).  This option does not affect treatment of lowercase
-      for initial matches.
+      score, 0), a.k.a. `gentle masking
+      <https://doi.org/10.1371/journal.pone.0028819>`_.
+
+      This option does not affect treatment of lowercase for initial
+      matches.

  -w DISTANCE
      This option is a kludge to avoid catastrophic time and memory

--- a/doc/lastdb.html
+++ b/doc/lastdb.html
@@ -432,6 +432,11 @@ considered equivalent.</p>
 (SIZE + 1).</p>
 </td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">-S <var>STRAND</var></span></kbd></td>
+<td>Specify which strand of the input sequences should be prepared
+for alignment: 0 means reverse only, 1 means forward only, and 2
+means both.</td></tr>
+<tr><td class="option-group">
 <kbd><span class="option">-s <var>BYTES</var></span></kbd></td>
 <td><p class="first">Limit memory usage, by splitting the output files into smaller
 &quot;volumes&quot; if necessary.  This will limit the memory usage of
@@ -562,6 +567,8 @@ bytes.</p>
 </li>
 <li><p class="first">-s: does not change the total size, but splits it into volumes.</p>
 </li>
+<li><p class="first">-S2: doubles the size of everything.</p>
+</li>
 </ul>
 </div>
 <div class="section" id="limitations">

--- a/doc/lastdb.txt
+++ b/doc/lastdb.txt
@@ -98,6 +98,11 @@ Advanced Options
      The fraction of positions that are "minimum" is roughly: 2 /
      (SIZE + 1).

+  -S STRAND
+      Specify which strand of the input sequences should be prepared
+      for alignment: 0 means reverse only, 1 means forward only, and 2
+      means both.
+
  -s BYTES      
      Limit memory usage, by splitting the output files into smaller
      "volumes" if necessary.  This will limit the memory usage of
@@ -228,6 +233,8 @@ This is modified by several options.

 * -s: does not change the total size, but splits it into volumes.

+* -S2: doubles the size of everything.
+
 Limitations
 -----------


--- a/scripts/fastq-interleave
+++ b/scripts/fastq-interleave
@@ -5,11 +5,18 @@ test $# = 2 || {
 Usage: $0 x.fastq y.fastq
 or:    $0 x.fastq.gz y.fastq.gz

-Read 2 fastq files, and write them interleaved.
-Assumes 1 fastq per 4 lines, i.e. no line wrapping.
+Read 2 fastq files, and write them interleaved.  Keep just the first
+word of header lines, and append "/1" and "/2" if they are otherwise
+identical.  Assumes 1 fastq per 4 lines, i.e. no line wrapping.
 EOF
    exit
 }

-paste <(gzip -cdf "$1" | paste - - - -) <(gzip -cdf "$2" | paste - - - -) |
-    tr '\t' '\n'
+fastqTab () {
+    gzip -cdf "$@" |
+	sed -e 's/^@  */@/' -e 's/ .*//' |
+	paste - - - -
+}
+
+paste <(fastqTab "$1") <(fastqTab "$2") |
+    awk '$1 == $5 {$1 = $1 "/1"; $5 = $5 "/2"} $1 = $1' OFS="\n"
--- a/scripts/last-dotplot
+++ b/scripts/last-dotplot
@@ -239,12 +239,12 @@ def mergedRanges(ranges):
    yield oldBeg, maxEnd

 def mergedRangesPerSeq(coverDict):
-    for k, v in coverDict.iteritems():
+    for k, v in coverDict.items():
        v.sort()
        yield k, list(mergedRanges(v))

 def coveredLength(mergedCoverDict):
-    return sum(sum(e - b for b, e in v) for v in mergedCoverDict.itervalues())
+    return sum(sum(e - b for b, e in v) for v in mergedCoverDict.values())

 def trimmed(seqRanges, coverDict, minAlignedBases, maxGapFrac, endPad, midPad):
    maxEndGapFrac, maxMidGapFrac = twoValuesFromOption(maxGapFrac, ",")
@@ -713,7 +713,8 @@ def getFont(opts):
    fileNames = []
    try:
        x = ["fc-match", "-f%{file}", "arial"]
-        p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        p = subprocess.Popen(x, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                             universal_newlines=True)
        out, err = p.communicate()
        fileNames.append(out)
    except OSError as e:

--- a/scripts/last-map-probs
+++ b/scripts/last-map-probs
@@ -7,7 +7,17 @@
 # probabilities make the risky assumption that one of the alignments
 # reported for each query is correct.

-import sys, os, fileinput, math, optparse, signal
+from __future__ import print_function
+
+import gzip
+import sys, os, math, optparse, signal
+
+def myOpen(fileName):
+    if fileName == "-":
+        return sys.stdin
+    if fileName.endswith(".gz"):
+        return gzip.open(fileName)
+    return open(fileName)

 def logsum(numbers):
    """Adds numbers, in log space, to avoid overflow."""
@@ -42,7 +52,7 @@ def scoreTotals(queryNames, scores, temperature):
    queryLists = {}
    for n, s in zip(queryNames, scores):
        queryLists.setdefault(n, []).append(s / temperature)
-    return dict((k, logsum(v)) for k, v in queryLists.iteritems())
+    return dict((k, logsum(queryLists[k])) for k in queryLists)

 def writeOneBatch(lines, queryNames, scores, denominators, opts, temperature):
    isWanted = True
@@ -64,7 +74,7 @@ def writeOneBatch(lines, queryNames, scores, denominators, opts, temperature):
            if s < opts.score or p > opts.mismap: continue
            newLineEnd = "\tmismap=%.3g\n" % p
            line = line.rstrip() + newLineEnd
-        if isWanted: print line,
+        if isWanted: print(line, end="")
        if line.isspace(): isWanted = True  # reset at end of maf paragraph

 def doOneBatch(lines, opts, temperature):
@@ -77,7 +87,7 @@ def readHeaderOrDie(lines):
    e = -1
    for line in lines:
        if line.startswith("#") or line.isspace():
-            print line,
+            print(line, end="")
            for i in line.split():
                if i.startswith("t="): t = float(i[2:])
                elif i.startswith("e="): e = int(i[2:])
@@ -86,8 +96,7 @@ def readHeaderOrDie(lines):
            raise Exception("I need a header with t= and e=")
    return t, e

-def lastMapProbs(opts, args):
-    f = fileinput.input(args)
+def doOneFile(opts, f):
    temperature, e = readHeaderOrDie(f)
    if opts.score < 0: opts.score = e + round(temperature * math.log(1000))
    lines = []
@@ -99,6 +108,13 @@ def lastMapProbs(opts, args):
        lines.append(line)
    doOneBatch(lines, opts, temperature)

+def lastMapProbs(opts, args):
+    if args:
+        for i in args:
+            doOneFile(opts, myOpen(i))
+    else:
+        doOneFile(opts, sys.stdin)
+
 if __name__ == "__main__":
    signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # avoid silly error message


--- a/scripts/last-postmask
+++ b/scripts/last-postmask
@@ -102,6 +102,8 @@ def doOneFile(lines):
        if line[0] == "#":
            print(line, end="")
            fields = line.split()
+            nf = len(fields)
+            if not colHeads:
                for i in fields:
                    if i.startswith("a="): aDel = int(i[2:])
                    if i.startswith("b="): bDel = int(i[2:])
@@ -109,9 +111,9 @@ def doOneFile(lines):
                    if i.startswith("B="): bIns = int(i[2:])
                    if i.startswith("e="): minScore = int(i[2:])
                    if i.startswith("S="): strandParam = int(i[2:])
-            if not colHeads and len(fields) > 1 and max(map(len, fields)) == 1:
+                if nf > 1 and max(map(len, fields)) == 1:
                    colHeads = fields[1:]
-            elif len(fields) > 2 and len(fields[1]) == 1:
+            elif nf == len(colHeads) + 2 and len(fields[1]) == 1:
                rowHeads.append(fields[1])
                scoreMatrix.append([int(i) for i in fields[2:]])
        elif line.isspace():
@@ -127,10 +129,10 @@ def doOneFile(lines):
                    matrices = matrixPerStrand(rowHeads, colHeads,
                                               scoreMatrix, bDel, bIns)
                fields = line.split()
-                if len(seqs) == strandParam:
-                    strand = fields[4]
-                    matrix = matrices[strand == "-"]
                seqs.append(fields[6])
+                if len(seqs) == 2:
+                    isReverse = (fields[4] == "-")
+                    matrix = matrices[isReverse * strandParam]
    if seqs: printIfGood(maf, seqs, matrix, aDel, aIns, minScore)

 def lastPostmask(args):

--- a/scripts/last-train
+++ b/scripts/last-train
@@ -132,20 +132,6 @@ def scaleFromHeader(lines):
                return float(i[2:])
    raise Exception("couldn't read the scale")

-def scoreMatrixFromHeader(lines):
-    matrix = []
-    for line in lines:
-        w = line.split()
-        if len(w) > 2 and len(w[1]) == 1:
-            matrix.append(w[1:])
-        elif matrix:
-            break
-    return matrix
-
-def scaledMatrix(matrix, scaleIncrease):
-    return matrix[0:1] + [i[0:1] + [int(j) * scaleIncrease for j in i[1:]]
-                          for i in matrix[1:]]
-
 def countsFromLastOutput(lines, opts):
    matrix = []
    # use +1 pseudocounts as a kludge to mitigate numerical problems:
@@ -208,7 +194,7 @@ def writeMatrixBody(outFile, prefix, alphabet, matrix, formatString):
 def writeCountMatrix(outFile, matrix, prefix):
    alphabet = guessAlphabet(len(matrix))
    writeMatrixHead(outFile, prefix, alphabet, "%-14s")
-    writeMatrixBody(outFile, prefix, alphabet, matrix, "%-14s")
+    writeMatrixBody(outFile, prefix, alphabet, matrix, "%-14.12g")

 def writeProbMatrix(outFile, matrix, prefix):
    alphabet = guessAlphabet(len(matrix))
@@ -268,11 +254,11 @@ def gapProbsFromCounts(counts, opts):
        delExtendProb = (deletes - delOpens) / deletes
        insExtendProb = (inserts - insOpens) / inserts

-    print("# aligned letter pairs:", matches)
-    print("# deletes:", deletes)
-    print("# inserts:", inserts)
-    print("# delOpens:", delOpens)
-    print("# insOpens:", insOpens)
+    print("# aligned letter pairs: %.12g" % matches)
+    print("# deletes: %.12g" % deletes)
+    print("# inserts: %.12g" % inserts)
+    print("# delOpens: %.12g" % delOpens)
+    print("# insOpens: %.12g" % insOpens)
    print("# alignments:", alignments)
    print("# mean delete size: %g" % (deletes / delOpens))
    print("# mean insert size: %g" % (inserts / insOpens))
@@ -382,16 +368,15 @@ def doTraining(opts, args):
    x += args
    y = ["last-split", "-n"]
    z = ["last-postmask"]
-    p = subprocess.Popen(x, stdout=subprocess.PIPE)
-    q = subprocess.Popen(y, stdin=p.stdout, stdout=subprocess.PIPE)
+    p = subprocess.Popen(x, stdout=subprocess.PIPE, universal_newlines=True)
+    q = subprocess.Popen(y, stdin=p.stdout, stdout=subprocess.PIPE,
+                         universal_newlines=True)
    if opts.postmask:
-        q = subprocess.Popen(z, stdin=q.stdout, stdout=subprocess.PIPE)
+        q = subprocess.Popen(z, stdin=q.stdout, stdout=subprocess.PIPE,
+                             universal_newlines=True)
    lastalVersion = versionFromHeader(q.stdout)
    externalScale = scaleFromHeader(q.stdout)
    internalScale = externalScale * scaleIncrease
-    if opts.Q:
-        externalMatrix = scoreMatrixFromHeader(q.stdout)
-        internalMatrix = scaledMatrix(externalMatrix, scaleIncrease)
    oldParameters = []

    print("# lastal version:", lastalVersion)
@@ -401,17 +386,13 @@ def doTraining(opts, args):
    print()

    while True:
-        print("#", " ".join(x))
+        print("#", *x)
        print()
        sys.stdout.flush()
        matCounts, gapCounts = countsFromLastOutput(q.stdout, opts)
        gapProbs = gapProbsFromCounts(gapCounts, opts)
        gapCosts = gapCostsFromProbs(internalScale, gapProbs)
        printGapCosts(gapCosts)
-        if opts.Q:
-            if gapCosts in oldParameters: break
-            oldParameters.append(gapCosts)
-        else:
        matProbs = matProbsFromCounts(matCounts, opts)
        matScores = matScoresFromProbs(internalScale, matProbs)
        print("# score matrix "
@@ -425,20 +406,23 @@ def doTraining(opts, args):
        x = fixedLastalArgs(opts, lastalProgName)
        x.append("-p-")
        x += args
-        p = subprocess.Popen(x, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+        p = subprocess.Popen(x, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+                             universal_newlines=True)
        writeGapCosts(gapCosts, p.stdin)
        writeMatrixWithLetters(p.stdin, internalMatrix, "")
        p.stdin.close()
        # in python2.6, the next line must come after p.stdin.close()
-        q = subprocess.Popen(y, stdin=p.stdout, stdout=subprocess.PIPE)
+        q = subprocess.Popen(y, stdin=p.stdout, stdout=subprocess.PIPE,
+                             universal_newlines=True)
        if opts.postmask:
-            q = subprocess.Popen(z, stdin=q.stdout, stdout=subprocess.PIPE)
+            q = subprocess.Popen(z, stdin=q.stdout, stdout=subprocess.PIPE,
+                                 universal_newlines=True)

+    if opts.Q: writeLine(sys.stdout, "#last -Q", opts.Q)
    gapCosts = gapCostsFromProbs(externalScale, gapProbs)
    writeGapCosts(gapCosts, sys.stdout)
    if opts.s: writeLine(sys.stdout, "#last -s", opts.s)
    if opts.S: writeLine(sys.stdout, "#last -S", opts.S)
-    if not opts.Q:
    matScores = matScoresFromProbs(externalScale, matProbs)
    externalMatrix = matrixWithLetters(matScores)
    print("# score matrix "
@@ -451,7 +435,7 @@ def lastTrain(opts, args):
        refName = args[0]
        queryFiles = args[1:]
        try:
-            with tempfile.NamedTemporaryFile(delete=False) as f:
+            with tempfile.NamedTemporaryFile("w", delete=False) as f:
                getSeqSample(opts, queryFiles, f)
            doTraining(opts, [refName, f.name])
        finally:

--- a/scripts/maf-convert
+++ b/scripts/maf-convert
@@ -12,6 +12,11 @@ from itertools import *
 import gzip
 import math, optparse, os, signal, sys

+try:
+    from future_builtins import map, zip
+except ImportError:
+    pass
+
 def myOpen(fileName):
    if fileName == "-":
        return sys.stdin
@@ -40,7 +45,7 @@ def gapRunCount(row):
    return sum(k == "-" for k, v in groupby(row))

 def alignmentRowsFromColumns(columns):
-    return imap(''.join, izip(*columns))
+    return map(''.join, zip(*columns))

 def symbolSize(symbol, letterSize):
    if symbol == "\\": return 1
@@ -210,7 +215,7 @@ def writeTab(maf):

    letterSizes = [i[3] for i in sLines]
    rows = [i[6] for i in sLines]
-    alignmentColumns = izip(*rows)
+    alignmentColumns = zip(*rows)
    gapWord = ",".join(matchAndInsertSizes(alignmentColumns, letterSizes))
    outWords.append(gapWord)

@@ -242,7 +247,7 @@ def writeChain(maf):

    letterSizes = [i[3] for i in sLines]
    rows = [i[6] for i in sLines]
-    alignmentColumns = izip(*rows)
+    alignmentColumns = zip(*rows)
    size = "0"
    for i in matchAndInsertSizes(alignmentColumns, letterSizes):
        if ":" in i:
@@ -272,7 +277,7 @@ def pslBlocks(opts, mafs, outCounts):
        letterSizeB, begB, endB, rowB = fieldsB[3:7]

        size = 0
-        for x, y in izip(rowA.upper(), rowB.upper()):
+        for x, y in zip(rowA.upper(), rowB.upper()):
            if x == "-":
                if size:
                    yield size, begA, begB
@@ -502,18 +507,18 @@ def writeSam(readGroup, maf):

    pos = str(begA + 1)  # convert to 1-based coordinate

-    alignmentColumns = zip(rowA.upper(), rowB.upper())
+    alignmentColumns = list(zip(rowA.upper(), rowB.upper()))

    revBegB = seqLenB - endB
    cigar = "".join(cigarParts(begB, iter(alignmentColumns), revBegB))

-    seq = rowB.translate(None, "-")
+    seq = rowB.replace("-", "")

    qual = "*"
    if qLines:
        qFields = qLines[-1].split()
        if qFields[1] == seqNameB:
-            qual = ''.join(j for i, j in izip(rowB, qFields[2]) if i != "-")
+            qual = ''.join(j for i, j in zip(rowB, qFields[2]) if i != "-")

    # It's hard to get all the pair info, so this is very
    # incomplete, but hopefully good enough.
@@ -588,9 +593,9 @@ def blastChunker(sLines, lineSize, alignmentColumns):
    coords = [i[4] for i in sLines]
    for chunkCols in chunker(alignmentColumns, lineSize):
        chunkRows = list(alignmentRowsFromColumns(chunkCols))
-        begs = map(blastBegCoordinate, coords, strands, seqLens)
-        coords = map(nextCoordinate, coords, chunkRows, letterSizes)
-        ends = map(blastEndCoordinate, coords, strands, seqLens)
+        begs = list(map(blastBegCoordinate, coords, strands, seqLens))
+        coords = list(map(nextCoordinate, coords, chunkRows, letterSizes))
+        ends = list(map(blastEndCoordinate, coords, strands, seqLens))
        yield chunkCols, chunkRows, begs, ends

 def writeBlast(opts, maf, oldQueryName):
@@ -621,7 +626,7 @@ def writeBlast(opts, maf, oldQueryName):

    print(scoreLine)

-    alignmentColumns = zip(rowA, rowB)
+    alignmentColumns = list(zip(rowA, rowB))

    alnSize = len(alignmentColumns)
    matches = sum(x.upper() == y.upper() for x, y in alignmentColumns)
@@ -667,7 +672,7 @@ def writeBlastTab(opts, maf):
    seqNameA, begA, endA, rowA = blastDataFromMafFields(fieldsA)
    seqNameB, begB, endB, rowB = blastDataFromMafFields(fieldsB)

-    alignmentColumns = zip(rowA, rowB)
+    alignmentColumns = list(zip(rowA, rowB))
    alnSize = len(alignmentColumns)
    matches = sum(x == y for x, y in alignmentColumns)
    matchPercent = "%.2f" % (100.0 * matches / alnSize)
@@ -762,15 +767,15 @@ def writeHtml(opts, maf):

    if pLines:
        probRows = [i.split()[1] for i in pLines]
-        probCols = izip(*probRows)
-        classes = imap(probabilityClass, probCols)
+        probCols = zip(*probRows)
+        classes = map(probabilityClass, probCols)
    else:
        classes = repeat(None)

    seqNames = [i[0] for i in sLines]
    nameWidth = maxlen(seqNames)
    rows = [i[6] for i in sLines]
-    alignmentColumns = zip(*rows)
+    alignmentColumns = list(zip(*rows))

    print('<pre>')
    for chunk in blastChunker(sLines, opts.linesize, alignmentColumns):