Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille
--- a/debian/changelog
+++ b/debian/changelog
+canu (1.7.1+dfsg-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * debhelper 11
+  * Point Vcs fields to salsa.debian.org
+  * Standards-Version: 4.1.5
+
+ -- Andreas Tille <tille@debian.org>  Tue, 17 Jul 2018 12:34:50 +0200
+
 canu (1.7+dfsg-1) unstable; urgency=medium

  * New upstream version

--- a/debian/compat
+++ b/debian/compat
-10
+11
--- a/debian/control
+++ b/debian/control
 Source: canu
-Section: science
-Priority: optional
 Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
 Uploaders: Afif Elghraoui <afif@debian.org>
-Build-Depends:
-	debhelper (>= 10),
+Section: science
+Priority: optional
+Build-Depends: debhelper (>= 11~),
               libboost-dev,
               libmeryl-dev,
 # For File::Path
               libfilesys-df-perl,
               mhap (>= 2.1.3)
-Standards-Version: 4.1.3
+Standards-Version: 4.1.5
+Vcs-Browser: https://salsa.debian.org/med-team/canu
+Vcs-Git: https://salsa.debian.org/med-team/canu.git
 Homepage: http://canu.readthedocs.org/en/latest/
-Vcs-Git: https://anonscm.debian.org/git/debian-med/canu.git
-Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/canu.git

 Package: canu
 Architecture: any
-Depends:
-	${shlibs:Depends},
+Depends: ${shlibs:Depends},
         ${misc:Depends},
         ${perl:Depends},
         libfilesys-df-perl,
         mhap (>= 2.1.3),
-	gnuplot,
-Suggests:
-	pbgenomicconsensus,
-	nanopolish,
+         gnuplot
+Suggests: pbgenomicconsensus,
+          nanopolish
 Description: single molecule sequence assembler for genomes
 Canu is a fork of the Celera Assembler, designed for high-noise
 single-molecule sequencing (such as the PacBio RS II or Oxford

--- a/src/AS_UTL/stddev.H
+++ b/src/AS_UTL/stddev.H
@@ -286,7 +286,7 @@ computeExponentialMovingAverage(TT alpha, TT ema, TT value) {



-
+#if 0

 template<typename TT>
 class genericStatistics {
@@ -338,12 +338,12 @@ public:

  vector<uint64>    &histogram(void) {    //  Returns pointer to private histogram data
    finalizeData();
-    return(&_histogram);
+    return(_histogram);
  };

  vector<uint64>    &Nstatistics(void) {  //  Returns pointer to private N data
    finalizeData();
-    return(&_Nstatistics);
+    return(_Nstatistics);
  };

  void               finalizeData(void) {
@@ -375,7 +375,7 @@ private:
  vector<uint64>  _Nstatistics;
 };

-
+#endif



@@ -422,12 +422,12 @@ public:
 #if 0
  vector<uint64>    &histogram(void) {    //  Returns pointer to private histogram data
    finalizeData();
-    return(&_histogram);
+    return(_histogram);
  };

  vector<uint64>    &Nstatistics(void) {  //  Returns pointer to private N data
    finalizeData();
-    return(&_Nstatistics);
+    return(_Nstatistics);
  };
 #endif


--- a/src/bogart/AS_BAT_BestOverlapGraph.C
+++ b/src/bogart/AS_BAT_BestOverlapGraph.C
@@ -130,6 +130,27 @@ BestOverlapGraph::removeHighErrorBestEdges(void) {

    if (b5->readId() != 0)   edgeStats.insert(erates[eratesLen++] = b5->erate());
    if (b3->readId() != 0)   edgeStats.insert(erates[eratesLen++] = b3->erate());
+
+    //  If there are NO best edges, find the overlap with the most matches and use that.
+
+    if ((b5->readId() == 0) &&
+        (b3->readId() == 0)) {
+      uint32      no    = 0;
+      BAToverlap *ovl   = OC->getOverlaps(fi, no);
+      uint32      bestM = 0;
+      double      bestE = 0.0;
+
+      for (uint32 oo=0; oo<no; oo++) {
+        double  matches = (1 - ovl[oo].erate()) * RI->overlapLength(ovl[oo].a_iid, ovl[oo].b_iid, ovl[oo].a_hang, ovl[oo].b_hang);
+        if (bestM < matches) {
+          bestM = matches;
+          bestE = ovl[oo].erate();
+        }
+      }
+
+      if (no > 0)
+        edgeStats.insert(erates[eratesLen++] = bestE);
+    }
  }

  _mean   = edgeStats.mean();
@@ -357,6 +378,53 @@ BestOverlapGraph::removeSpurs(const char *prefix) {



+//  Mark zombie masters.  Any read that has only contained overlaps (it is the container) and is the
+//  smallest ID of those with no hangs, is a master.  These get promoted to unitigs.
+//
+void
+BestOverlapGraph::findZombies(const char *prefix) {
+  uint32  fiLimit    = RI->numReads();
+  uint32  numThreads = omp_get_max_threads();
+  uint32  blockSize  = (fiLimit < 100 * numThreads) ? numThreads : fiLimit / 99;
+
+#pragma omp parallel for schedule(dynamic, blockSize)
+  for (uint32 fi=1; fi <= fiLimit; fi++) {
+    uint32      no  = 0;
+    BAToverlap *ovl = OC->getOverlaps(fi, no);
+    uint32      nc = 0;
+
+    if (no == 0)
+      continue;
+
+    for (uint32 ii=0; ii<no; ii++, nc++)       //  If any overlap makes A not
+      if (ovl[ii].AisContainer() == false)     //  a container, it's not a zombie
+        break;
+
+    if (nc < no)
+      continue;
+
+    nc = UINT32_MAX;
+
+    for (uint32 ii=0; ii<no; ii++)             //  Find the smallest ID
+      if ((ovl[ii].a_hang == 0) &&             //  with no hangs.
+          (ovl[ii].b_hang == 0) &&
+          (ovl[ii].b_iid < nc))
+        nc = ovl[ii].b_iid;
+
+    if (fi < nc) {                             //  If we're smaller, we're a
+#pragma omp critical (suspInsert)              //  Zombie Master!
+      {
+        writeLog("read %u is a zombie.\n", fi);
+        _zombie.insert(fi);
+      }
+    }
+  }
+
+  writeStatus("BestOverlapGraph()-- detected " F_SIZE_T " zombie reads.\n", _zombie.size());
+}
+
+
+
 void
 BestOverlapGraph::findEdges(void) {
  uint32  fiLimit    = RI->numReads();
@@ -513,6 +581,8 @@ BestOverlapGraph::BestOverlapGraph(double        erateGraph,
    findEdges();
  }

+  findZombies(prefix);
+
  reportBestEdges(prefix, logFileFlagSet(LOG_ALL_BEST_EDGES) ? "best.3.final" : "best");

  //  One more pass, to find any ambiguous best edges.

--- a/src/bogart/AS_BAT_BestOverlapGraph.H
+++ b/src/bogart/AS_BAT_BestOverlapGraph.H
@@ -183,6 +183,7 @@ private:
  void   removeSuspicious(const char *prefix);
  void   removeSpurs(const char *prefix);
  void   removeLopsidedEdges(const char *prefix);
+  void   findZombies(const char *prefix);

  void   findEdges(void);

@@ -240,6 +241,10 @@ public:
    return(_suspicious.count(readid) > 0);
  };

+  bool isZombie(const uint32 readid) {
+    return(_zombie.count(readid) > 0);
+  };
+
  void      reportEdgeStatistics(const char *prefix, const char *label);
  void      reportBestEdges(const char *prefix, const char *label);

@@ -283,6 +288,7 @@ private:
  set<uint32>                _suspicious;
  set<uint32>                _singleton;
  set<uint32>                _spur;
+  set<uint32>                _zombie;

  map<uint32, BestOverlaps>  _bestM;
  map<uint32, BestScores>    _scorM;

--- a/src/bogart/AS_BAT_DropDeadEnds.C
+++ b/src/bogart/AS_BAT_DropDeadEnds.C
@@ -133,17 +133,16 @@ findPrevRead(Unitig *tig,

 uint32
 dropDeadFirstRead(AssemblyGraph *AG,
-                  Unitig        *tig) {
+                  Unitig        *tig,
+                  bool           isForward) {

  ufNode *fn = tig->firstRead();
  ufNode *sn = findNextRead(tig, fn);

   //  No next read, keep fn in the tig.

-  if (sn == NULL) {
-    writeLog("dropDead()- read %8u no sn\n", fn->ident);
+  if (sn == NULL)
    return(0);
-  }

  //  Over all edges from the first read, look for any edge to something else.
  //
@@ -153,23 +152,35 @@ dropDeadFirstRead(AssemblyGraph *AG,
  //  the tig.  We assume that this is always the first read, which is OK, because the function name
  //  says so.  Any edge to anywhere means the read is good and should be kept.

+  if (AG->getForward(fn->ident).size() == 0)
+    writeLog("dropDead()-- (%s) 1st read %8u has no edges\n", (isForward) ? "fwd" : "rev", fn->ident);
+
  for (uint32 pp=0; pp<AG->getForward(fn->ident).size(); pp++) {
    BestPlacement  &pf = AG->getForward(fn->ident)[pp];

-    writeLog("dropDead()-- 1st read %8u %s pf %3u/%3u best5 %8u best3 %8u bestC %8u\n",
+
+    if (pf.bestC.b_iid > 0) {
+      writeLog("dropDead()-- (%s) 1st read %8u %s edge %4u/%-4u - contained in %u - confirmed\n",
+               (isForward) ? "fwd" : "rev",
               fn->ident,
               fn->position.isForward() ? "->" : "<-",
               pp, AG->getForward(fn->ident).size(),
-             pf.best5.b_iid, pf.best3.b_iid, pf.bestC.b_iid);
-
-    if (pf.bestC.b_iid > 0) {
+               pf.bestC.b_iid);
      return(0);
    }

    if (((fn->position.isForward() == true)  && (pf.best5.b_iid != 0)) ||
-        ((fn->position.isForward() == false) && (pf.best3.b_iid != 0)))
+        ((fn->position.isForward() == false) && (pf.best3.b_iid != 0))) {
+      writeLog("dropDead()-- (%s) 1st read %8u %s edge %4u/%-4u - 5:%u 3:%u - confirmed\n",
+               (isForward) ? "fwd" : "rev",
+               fn->ident,
+               fn->position.isForward() ? "->" : "<-",
+               pp, AG->getForward(fn->ident).size(),
+               pf.best5.b_iid, pf.best3.b_iid);
      return(0);
    }
+  }
+

  //  But no edge means we need to check the second read.  If it has an edge, then we infer the
  //  first read is bogus and should be removed.  If it also has no edge (except to the first read,
@@ -180,26 +191,41 @@ dropDeadFirstRead(AssemblyGraph *AG,
  //  first read.  Well, and that if the second read has an edge we declare the first read to be
  //  junk.  That's also a bit of a difference from the previous loop.

+  if (AG->getForward(sn->ident).size() == 0) {
+    writeLog("dropDead()-- (%s) 2nd read %8u has no edges - keep first\n", (isForward) ? "fwd" : "rev", sn->ident);
+    return(0);
+  }
+
  for (uint32 pp=0; pp<AG->getForward(sn->ident).size(); pp++) {
    BestPlacement  &pf = AG->getForward(sn->ident)[pp];

-    writeLog("dropDead()-- 2nd read %8u %s pf %3u/%3u best5 %8u best3 %8u bestC %8u\n",
+    if ((pf.bestC.b_iid > 0) && (pf.bestC.b_iid != fn->ident)) {
+      writeLog("dropDead()-- (%s) 2nd read %8u %s edge %4u/%-4u - contained in %u - drop first\n",
+               (isForward) ? "fwd" : "rev",
               sn->ident,
               sn->position.isForward() ? "->" : "<-",
               pp, AG->getForward(sn->ident).size(),
-             pf.best5.b_iid, pf.best3.b_iid, pf.bestC.b_iid);
-
-    if ((pf.bestC.b_iid > 0) && (pf.bestC.b_iid != fn->ident))
+               pf.bestC.b_iid);
      return(fn->ident);
+    }

    if (((sn->position.isForward() == true)  && (pf.best5.b_iid != 0) && (pf.best5.b_iid != fn->ident)) ||
-        ((sn->position.isForward() == false) && (pf.best3.b_iid != 0) && (pf.best3.b_iid != fn->ident)))
+        ((sn->position.isForward() == false) && (pf.best3.b_iid != 0) && (pf.best3.b_iid != fn->ident))) {
+      writeLog("dropDead()-- (%s) 2nd read %8u %s edge %4u/%-4u - 5:%u 3:8u - drop first\n",
+               (isForward) ? "fwd" : "rev",
+               sn->ident,
+               sn->position.isForward() ? "->" : "<-",
+               pp, AG->getForward(sn->ident).size(),
+               pf.best5.b_iid, pf.best3.b_iid);
      return(fn->ident);
    }
+  }

  //  Otherwise, the second read had only edges to the first read, and we should keep the first
  //  read.

+  writeLog("dropDead()-- (%s) 2nd read %8u has no useful external edges - keep first\n", (isForward) ? "fwd" : "rev", sn->ident);
+
  return(0);
 }

@@ -222,20 +248,32 @@ dropDeadEnds(AssemblyGraph  *AG,
        (tig->_isUnassembled == true))
      continue;

-    uint32  fn = dropDeadFirstRead(AG, tig);        //  Decide if the first read is junk.
+    writeLog("\n");
+    writeLog("dropDead()-- testing tig %u length %u\n", ti, tig->getLength());
+
+    uint32  fn = dropDeadFirstRead(AG, tig, true);  //  Decide if the first read is junk.
+
+    //fprintf(stderr, "drop dead for tig %u (flipped)\n", ti);

    tig->reverseComplement();                       //  Flip.
-    uint32  ln = dropDeadFirstRead(AG, tig);        //  Decide if the last (now first) read is junk.
+    uint32  ln = dropDeadFirstRead(AG, tig, false); //  Decide if the last (now first) read is junk.
    tig->reverseComplement();                       //  Flip back.

-    if ((fn == 0) && (ln == 0))                     //  Nothing to remove, just get out of here.
+    if ((fn == 0) && (ln == 0)) {                   //  Nothing to remove, nothing to do.
+      writeLog("dropDead()-- both ends confirmed\n");
      continue;
+    }
+
+    if (fn == ln) {                                 //  The same thing to remove, ignore it.
+      writeLog("dropDead()-- retaining spanning read %u\n", fn);
+      continue;
+    }

    //  At least one read needs to be kicked out.  Make new tigs for everything.

-    char   fnMsg[80] = {0};   Unitig  *fnTig = NULL;
-    char   nnMsg[80] = {0};   Unitig  *nnTig = NULL;  int32  nnOff = INT32_MAX;
-    char   lnMsg[80] = {0};   Unitig  *lnTig = NULL;
+    Unitig  *fnTig = NULL;
+    Unitig  *nnTig = NULL;  int32  nnOff = INT32_MAX;
+    Unitig  *lnTig = NULL;

    if (fn > 0)
      fnTig = tigs.newUnitig(false);
@@ -256,32 +294,26 @@ dropDeadEnds(AssemblyGraph  *AG,

    //  Move reads to their new unitig.

-    strcpy(fnMsg, "                                      ");
-    strcpy(nnMsg, "                          ");
-    strcpy(lnMsg, "");
-
    for (uint32 cc=0, tt=0; tt<tig->ufpath.size(); tt++) {
      ufNode  &read = tig->ufpath[tt];

      if        (read.ident == fn) {
-        sprintf(fnMsg, "first read %9u to tig %7u --", read.ident, fnTig->id());
+        writeLog("dropDead()-- tig %u gets first read %u\n", fnTig->id(), read.ident);
        fnTig->addRead(read, -read.position.min(), false);

      } else if (read.ident == ln) {
-        sprintf(lnMsg, "-- last read %9u to tig %7u", read.ident, lnTig->id());
+        writeLog("dropDead()-- tig %u gets last read %u\n", lnTig->id(), read.ident);
        lnTig->addRead(read, -read.position.min(), false);

      } else {
        if (nnOff == INT32_MAX) {
-          sprintf(nnMsg, "other reads to tig %7u", nnTig->id());
+          writeLog("dropDead()-- tig %u gets all other reads\n", nnTig->id());
          nnOff = read.position.min();
        }
        nnTig->addRead(read, -nnOff, false);
      }
    }

-    writeLog("dropDeadEnds()-- tig %7u --> %s %s %s\n", tig->id(), fnMsg, nnMsg, lnMsg);
-
    if (fnTig)  fnTig->cleanUp();   //  Probably not neeeded, but cheap.
    if (lnTig)  lnTig->cleanUp();   //  Probably not neeeded, but cheap.
    if (nnTig)  nnTig->cleanUp();   //  Most likely needed.
@@ -292,6 +324,6 @@ dropDeadEnds(AssemblyGraph  *AG,
    tigs[ti] = NULL;
  }

-  writeStatus("dropDeadEnds()-- Modified %u tigs.  Dropped %u first and %u last reads, %u tig%s had both reads dropped.\n",
+  writeStatus("dropDead()-- Modified %u tigs.  Dropped %u first and %u last reads, %u tig%s had both reads dropped.\n",
              numT, numF, numL, numB, (numB == 1) ? "" : "s");
 }
--- a/src/bogart/AS_BAT_MarkRepeatReads.C
+++ b/src/bogart/AS_BAT_MarkRepeatReads.C
@@ -708,6 +708,9 @@ findConfusedEdges(TigVector            &tigs,
          continue;

        //  Skip if this overlap is the best we're trying to match.
+        //
+        //  NOTE.  This doesn't care about potential duplicate overlaps between a pair of reads,
+        //  as we're looking for thicker overlaps off only one end of the read.
        if ((rdBid == b5->readId()) ||
            (rdBid == b3->readId()))
          continue;

--- a/src/bogart/AS_BAT_MergeOrphans.C
+++ b/src/bogart/AS_BAT_MergeOrphans.C
@@ -73,8 +73,15 @@ typedef  map<uint32, vector<uint32> >  BubTargetList;



-//  Decide which tigs can be orphans.  Any unitig where (nearly) every dovetail read has an overlap
-//  to some other unitig is a candidate for orphan popping.
+//  Decide which tigs can be orphans.  Any unitig where (nearly) every dovetail
+//  read has an overlap to some other unitig is a candidate for orphan popping.
+//
+//  Counts the number of reads that have an overlap to some other tig
+//  (tigOlapsTo).  if more than half the reads in the tig have an overlap to
+//  some other tig, it is a potential place to pop the bubble.
+//
+//  Returns BubTargetList, a map of uint32 to vector<uint32>, of the potential
+//  places that some tig could be popped into.

 void
 findPotentialOrphans(TigVector       &tigs,
@@ -617,6 +624,9 @@ mergeOrphans(TigVector &tigs,
    vector<candidatePop *>    targets;

    for (map<uint32, intervalList<uint32> *>::iterator it=targetIntervals.begin(); it != targetIntervals.end(); ++it)
+      if (tigs[it->first] == NULL)
+        writeLog("mergeOrphans()-- orphan %u wants to go into nonexistent tig %u!\n", ti, it->first);
+      else
        saveCorrectlySizedInitialIntervals(orphan,
                                           tigs[it->first],     //  The targetID      in targetIntervals
                                           it->second,          //  The interval list in targetIntervals
@@ -657,7 +667,7 @@ mergeOrphans(TigVector &tigs,
                   targets[tt]->placed[op].frgID,
                   targets[tt]->placed[op].position.bgn, targets[tt]->placed[op].position.end);

-      writeLog("mergeOrphans()-- tig %8u length %9u -> target %8u piece %2u position %9u-%-9u length %8u - expected %3" F_SIZE_TP " reads, had %3" F_SIZE_TP " reads.\n",
+      writeLog("mergeOrphans()-- tig %8u length %9u -> target %8u piece %2u position %9u-%-9u length %8u - expected %3" F_U32 " reads, had %3" F_U32 " reads.\n",
               orphan->id(), orphan->getLength(),
               targets[tt]->target->id(), tt, targets[tt]->bgn, targets[tt]->end, targets[tt]->end - targets[tt]->bgn,
               orphanSize, targetSize);

--- a/src/bogart/AS_BAT_OptimizePositions.C
+++ b/src/bogart/AS_BAT_OptimizePositions.C
@@ -59,6 +59,77 @@ public:



+//  Decide if two reads in a tig are compatible with an overlap.
+//
+//  Returns true if the reads are overlapping in the tig, and are oriented consistent with the overlap.
+//
+bool
+Unitig::optimize_isCompatible(uint32       ii,
+                              uint32       jj,
+                              BAToverlap  &olap,
+                              bool         inInit,
+                              bool         secondPass,
+                              bool         beVerbose) {
+
+  SeqInterval  &ip = ufpath[ii].position;
+  SeqInterval  &jp = ufpath[jj].position;
+
+
+  bool  isOvl = isOverlapping(ufpath[ii].position, ufpath[jj].position);
+
+  //  Decide if the overlap preserves the ordering in the tig.
+  //
+  //  This is of questionable value, and might be incorrect.  We can have two overlaps between
+  //  a pair of reads iff the orientation differs - the flipped flag.  That gets tested elsewhere.
+
+  bool  isOrdered = true;
+
+#if 1
+  bool  isOvlLo = ((jp.min() <= ip.min()) && (ip.min() <= jp.max()) && (jp.max() <= ip.max()));
+  bool  isOvlHi = ((ip.min() <= jp.min()) && (jp.min() <= ip.max()) && (ip.max() <= jp.max()));
+
+  //bool  isOvlLo = jp.min() < ip.min();   //  Is the above too rigorous?
+  //bool  isOvlHi = ip.min() < jp.min();
+
+  //  isOvl tells the end of the read that should have the overlap (according to the tig layout),
+  //  so check if the overlap actually is on that end.  If not, flag this as 'mis ordered'.
+
+  if (((isOvlLo == true) && (ip.isForward()) && (olap.AEndIs5prime() == false)) ||
+      ((isOvlLo == true) && (ip.isReverse()) && (olap.AEndIs3prime() == false)) ||
+      ((isOvlHi == true) && (ip.isForward()) && (olap.AEndIs3prime() == false)) ||
+      ((isOvlHi == true) && (ip.isReverse()) && (olap.AEndIs5prime() == false)))
+    isOrdered = false;
+
+  //  But if in a containment relationship, it cannot be mis ordered.
+  if ((olap.AisContained() == true) ||
+      (olap.AisContainer() == true))
+    isOrdered = true;
+#endif
+
+  bool  isOriented = true;
+
+  if (((ip.isForward() == jp.isForward()) && (olap.flipped == true)) ||
+      ((ip.isForward() != jp.isForward()) && (olap.flipped == false)))
+    isOriented = false;
+
+  if ((beVerbose) || (secondPass))
+    writeLog("optimize_%s()-- tig %7u read %9u (at %9d %9d) olap to read %9u (at %9d %9d) - hangs %7d %7d - %s %s ovlLo %d ovlHi %d contained %d container %d\n",
+             (inInit) ? "initPlace" : "recompute",
+             id(),
+             ufpath[ii].ident, ip.bgn, ip.end,
+             ufpath[jj].ident, jp.bgn, jp.end,
+             olap.a_hang, olap.b_hang,
+             (isOvl     == true) ? "overlapping" : "not-overlapping",
+             (isOrdered == true) ? "ordered"     : "mis-ordered",
+             isOvlLo, isOvlHi, olap.AisContained(), olap.AisContainer());
+
+  return((isOvl      == true) &&
+         (isOrdered  == true) &&
+         (isOriented == true));
+}
+
+
+
 void
 Unitig::optimize_initPlace(uint32        ii,
                           optPos       *op,
@@ -87,37 +158,18 @@ Unitig::optimize_initPlace(uint32        ii,
      uint32  uu  = inUnitig (jid);
      uint32  jj  = ufpathIdx(jid);

-      //  Probably overkill, but report ALL overlaps for the troubling reads.
-
-      if ((beVerbose) || (firstPass == false))
-        writeLog("optimize_initPlace()-- olap %u a %u b %u hangs %d %d\n", oo, iid, jid, ovl[oo].a_hang, ovl[oo].b_hang);
-
-      if (uu != id())   //  Skip if the overlap is to a different tig.
-        continue;       //  (the ufpathIdx() call is valid, but using it isn't)
-
-      //  Reads are in the same tig.  Decide if they overlap in position.
-
-      bool  isOvl = isOverlapping(ufpath[ii].position, ufpath[jj].position);
-
-      //  Log!  beVerbose should be true for the second pass, but just in case it isn't.
+      if (uu != id())   //  Skip if to a different tig.
+        continue;       //  (otherwise, ufpath[jj] is invalid below)

-      if ((beVerbose) || (firstPass == false))
-        writeLog("optimize_initPlace()-- olap %4u tig %7u read %8u (at %9d %9d) olap to read %8u (at %9d %9d) - hangs %7d %7d - %s %s\n",
-                 oo, id(),
-                 iid, ufpath[ii].position.bgn, ufpath[ii].position.end,
-                 jid, ufpath[jj].position.bgn, ufpath[jj].position.end,
-                 ovl[oo].a_hang, ovl[oo].b_hang,
-                 (isOvl == true) ? "overlapping" : "not-overlapping",
-                 (jj > ii)       ? "after"       : "before");
-
-      if (isOvl == false)            //  Skip if the reads
-        continue;                    //  don't overlap
+      //  Skip if:
+      //    this is the first pass, but the overlap is to a read after us.
+      //    the overlap isn't compatible with the layout.

-      if ((firstPass) && (jj > ii))  //  We're setting initial positions, so overlaps to reads after
-        continue;                    //  us aren't correct, unless we're in the 2nd pass
+      if (((firstPass) && (ii < jj)) ||
+          (optimize_isCompatible(ii, jj, ovl[oo], true, !firstPass, beVerbose) == false))
+        continue;

-      //  Reads overlap.  Compute the position of the read using
-      //  the overlap and the other read.
+      //  Compute the position of the read using the overlap and the other read.

      nmin += (op[iid].fwd) ? (op[jid].min - ovl[oo].a_hang) : (op[jid].min + ovl[oo].b_hang);
      cnt  += 1;
@@ -176,8 +228,9 @@ Unitig::optimize_recompute(uint32        iid,
  uint32       cnt  = 0;

  if (beVerbose) {
-    writeLog("optimize()-- tig %8u read %8u previous  - %9.2f-%-9.2f\n", id(), iid, op[iid].min,           op[iid].max);
-    writeLog("optimize()-- tig %8u read %8u length    - %9.2f-%-9.2f\n", id(), iid, op[iid].max - readLen, op[iid].min + readLen);
+    writeLog("optimize()--\n");
+    writeLog("optimize()-- tig %8u read %9u previous  - %9.2f-%-9.2f\n", id(), iid, op[iid].min,           op[iid].max);
+    writeLog("optimize()-- tig %8u read %9u length    - %9.2f-%-9.2f\n", id(), iid, op[iid].max - readLen, op[iid].min + readLen);
  }

  //  Process all overlaps.
@@ -187,26 +240,34 @@ Unitig::optimize_recompute(uint32        iid,
    uint32  uu  = inUnitig (jid);
    uint32  jj  = ufpathIdx(jid);

-    if (uu != id())   //  Skip if the overlap is to a different tig.
-      continue;       //  (the ufpathIdx() call is valid, but using it isn't)
+    if (uu != id())   //  Skip if to a different tig.
+      continue;       //  (otherwise, ufpath[jj] is invalid below)

-    if (isOverlapping(ufpath[ii].position, ufpath[jj].position) == false)  //  Skip if the reads
-      continue;                                                            //  don't overlap
-
-    //  Reads overlap.  Compute the position of the read using
-    //  the overlap and the other read.
+    //  Compute the position of the read using the overlap and the other read.

    double tmin = (op[iid].fwd) ? (op[jid].min - ovl[oo].a_hang) : (op[jid].min + ovl[oo].b_hang);
    double tmax = (op[iid].fwd) ? (op[jid].max - ovl[oo].b_hang) : (op[jid].max + ovl[oo].a_hang);

    if (beVerbose)
-      writeLog("optimize()-- tig %8u read %8u olap %4u - %9.2f-%-9.2f\n", id(), iid, oo, tmin, tmax);
+      writeLog("optimize()-- tig %8u read %9u olap %4u - %9.2f-%-9.2f\n", id(), iid, oo, tmin, tmax);
+
+    //  Skip if the overlap isn't compatible with the layout.
+
+    if (optimize_isCompatible(ii, jj, ovl[oo], false, false, beVerbose) == false)
+      continue;
+
+    //  Update estimate.

    nmin += tmin;
    nmax += tmax;
    cnt  += 1;
-  }  //  over all overlaps
+  }

+  if (cnt == 0) {
+    writeLog("Failed to optimize read %u in tig %u\n", iid, id());
+    fprintf(stderr, "Failed to optimize read %u in tig %u\n", iid, id());
+    flushLog();
+  }
  assert(cnt > 0);

  //  Add in some evidence for the bases in the read.  We want higher weight than the overlaps,
@@ -226,7 +287,7 @@ Unitig::optimize_recompute(uint32        iid,
    double dmax = 2 * (op[iid].max - np[iid].max) / (op[iid].max + np[iid].max);
    double npll = np[iid].max - np[iid].min;

-    writeLog("optimize()-- tig %8u read %8u           - %9.2f-%-9.2f length %9.2f/%-6d %7.2f%% posChange %+6.4f %+6.4f\n",
+    writeLog("optimize()-- tig %8u read %9u           - %9.2f-%-9.2f length %9.2f/%-6d %7.2f%% posChange %+6.4f %+6.4f\n",
             id(), iid,
             np[iid].min, np[iid].max,
             npll, readLen,
@@ -237,8 +298,6 @@ Unitig::optimize_recompute(uint32        iid,



-
-
 void
 Unitig::optimize_expand(optPos  *op) {

@@ -300,7 +359,7 @@ Unitig::optimize_setPositions(optPos  *op,

    if (op[iid].fwd) {
      if (beVerbose)
-        writeLog("optimize()-- read %8u -> from %9d,%-9d %7d to %9d,%-9d %7d readLen %7d diff %7.4f%%\n",
+        writeLog("optimize()-- read %9u -> from %9d,%-9d %7d to %9d,%-9d %7d readLen %7d diff %7.4f%%\n",
                 iid,
                 ufpath[ii].position.bgn,
                 ufpath[ii].position.end,
@@ -315,7 +374,7 @@ Unitig::optimize_setPositions(optPos  *op,
      ufpath[ii].position.end = (int32)op[iid].max;
    } else {
      if (beVerbose)
-        writeLog("optimize()-- read %8u <- from %9d,%-9d %7d to %9d,%-9d %7d readLen %7d diff %7.4f%%\n",
+        writeLog("optimize()-- read %9u <- from %9d,%-9d %7d to %9d,%-9d %7d readLen %7d diff %7.4f%%\n",
                 iid,
                 ufpath[ii].position.bgn,
                 ufpath[ii].position.end,
@@ -360,12 +419,13 @@ TigVector::optimizePositions(const char *prefix, const char *label) {
  for (uint32 fi=0; fi<fiLimit; fi++) {
    uint32    ti = inUnitig(fi);
    uint32    pp = ufpathIdx(fi);
+    Unitig   *tig = operator[](ti);

-    if (ti == 0)
+    if (tig == NULL)
      continue;

-    op[fi].set(operator[](ti)->ufpath[pp]);
-    np[fi].set(operator[](ti)->ufpath[pp]);
+    op[fi].set(tig->ufpath[pp]);
+    np[fi].set(tig->ufpath[pp]);
  }

  //  Compute initial positions using previously placed reads and the read length.
@@ -383,7 +443,7 @@ TigVector::optimizePositions(const char *prefix, const char *label) {
    Unitig       *tig = operator[](ti);
    set<uint32>   failed;

-    if (tig == NULL)
+    if ((tig == NULL) || (tig->ufpath.size() == 1))
      continue;

    for (uint32 ii=0; ii<tig->ufpath.size(); ii++)
@@ -407,11 +467,12 @@ TigVector::optimizePositions(const char *prefix, const char *label) {
 #pragma omp parallel for schedule(dynamic, fiBlockSize)
    for (uint32 fi=0; fi<fiLimit; fi++) {
      uint32        ti = inUnitig(fi);
+      Unitig       *tig = operator[](ti);

-      if (ti == 0)
+      if ((tig == NULL) || (tig->ufpath.size() == 1))
        continue;

-      operator[](ti)->optimize_recompute(fi, op, np, beVerbose);
+      tig->optimize_recompute(fi, op, np, beVerbose);
    }

    //  Reset zero
@@ -421,7 +482,7 @@ TigVector::optimizePositions(const char *prefix, const char *label) {
    for (uint32 ti=0; ti<tiLimit; ti++) {
      Unitig       *tig = operator[](ti);

-      if (tig == NULL)
+      if ((tig == NULL) || (tig->ufpath.size() == 1))
        continue;

      int32  z = np[ tig->ufpath[0].ident ].min;
@@ -479,7 +540,7 @@ TigVector::optimizePositions(const char *prefix, const char *label) {
  for (uint32 ti=0; ti<tiLimit; ti++) {
    Unitig       *tig = operator[](ti);

-    if (tig == NULL)
+    if ((tig == NULL) || (tig->ufpath.size() == 1))
      continue;

    tig->optimize_expand(op);
@@ -494,7 +555,7 @@ TigVector::optimizePositions(const char *prefix, const char *label) {
  for (uint32 ti=0; ti<tiLimit; ti++) {
    Unitig       *tig = operator[](ti);

-    if (tig == NULL)
+    if ((tig == NULL) || (tig->ufpath.size() == 1))
      continue;

    tig->optimize_setPositions(op, beVerbose);

--- a/src/bogart/AS_BAT_OverlapCache.C
+++ b/src/bogart/AS_BAT_OverlapCache.C
@@ -344,7 +344,7 @@ uint32
 OverlapCache::filterDuplicates(uint32 &no) {
  uint32   nFiltered = 0;

-  for (uint32 ii=0, jj=1; jj<no; ii++, jj++) {
+  for (uint32 ii=0, jj=1, dd=0; jj<no; ii++, jj++) {
    if (_ovs[ii].b_iid != _ovs[jj].b_iid)
      continue;

@@ -352,22 +352,35 @@ OverlapCache::filterDuplicates(uint32 &no) {

    nFiltered++;

-    //  Drop the shorter overlap, or the one with the higher erate.
+    //  Drop the weaker overlap.  If a tie, drop the flipped one.

-    uint32  iilen = RI->overlapLength(_ovs[ii].a_iid, _ovs[ii].b_iid, _ovs[ii].a_hang(), _ovs[ii].b_hang());
-    uint32  jjlen = RI->overlapLength(_ovs[jj].a_iid, _ovs[jj].b_iid, _ovs[jj].a_hang(), _ovs[jj].b_hang());
+    double iiSco = RI->overlapLength(_ovs[ii].a_iid, _ovs[ii].b_iid, _ovs[ii].a_hang(), _ovs[ii].b_hang()) * _ovs[ii].erate();
+    double jjSco = RI->overlapLength(_ovs[jj].a_iid, _ovs[jj].b_iid, _ovs[jj].a_hang(), _ovs[jj].b_hang()) * _ovs[jj].erate();

-    if (iilen == jjlen) {
-      if (_ovs[ii].evalue() < _ovs[jj].evalue())
-        jjlen = 0;
-      else
-        iilen = 0;
+    if (iiSco == jjSco) {             //  Hey gcc!  See how nice I was by putting brackets
+      if (_ovs[ii].flipped())         //  around this so you don't get confused by the
+        iiSco = 0;                    //  non-ambiguous ambiguous else clause?
+      else                            //
+        jjSco = 0;                    //  You're welcome.
    }

-    if (iilen < jjlen)
-      _ovs[ii].a_iid = _ovs[ii].b_iid = 0;
+    if (iiSco < jjSco)
+      dd = ii;
    else
-      _ovs[jj].a_iid = _ovs[jj].b_iid = 0;
+      dd = jj;
+
+#if 0
+    writeLog("OverlapCache::filterDuplicates()-- Dropping overlap A: %9" F_U64P " B: %9" F_U64P " - %6.4f%% - %6" F_S32P " %6" F_S32P " - %s\n",
+             _ovs[dd].a_iid,
+             _ovs[dd].b_iid,
+             _ovs[dd].a_hang(),
+             _ovs[dd].b_hang(),
+             _ovs[dd].erate(),
+             _ovs[dd].flipped() ? "flipped" : "");
+#endif
+
+    _ovs[dd].a_iid = 0;
+    _ovs[dd].b_iid = 0;
  }

  //  If nothing was filtered, return.
@@ -616,30 +629,29 @@ OverlapCache::loadOverlaps(ovStore *ovlStore, bool doSave) {



+//  Binary search a list of overlaps for one matching bID and flipped.
 bool
-searchForOverlap(BAToverlap *ovl, uint32 ovlLen, uint32 bID) {
+searchForOverlap(BAToverlap *ovl, uint32 ovlLen, uint32 bID, bool flipped) {
+  int32  F = 0;
+  int32  L = ovlLen - 1;
+  int32  M = 0;

 #ifdef TEST_LINEAR_SEARCH
  bool linearSearchFound = false;

  for (uint32 ss=0; ss<ovlLen; ss++)
-    if (ovl[ss].b_iid == bID) {
+    if ((ovl[ss].b_iid   == bID) &&
+        (ovl[ss].flipped == flipped)) {
      linearSearchFound = true;
      break;
    }
 #endif

-  //  If not, these are repeats and we should binary search everything.
-  //  There will be no short lists where we could exhaustively search.
-
-  int32  F = 0;
-  int32  L = ovlLen - 1;
-  int32  M = 0;
-
  while (F <= L) {
    M = (F + L) / 2;

-    if (ovl[M].b_iid == bID) {
+    if ((ovl[M].b_iid   == bID) &&
+        (ovl[M].flipped == flipped)) {
      ovl[M].symmetric = true;
 #ifdef TEST_LINEAR_SEARCH
      assert(linearSearchFound == true);
@@ -647,7 +659,8 @@ searchForOverlap(BAToverlap *ovl, uint32 ovlLen, uint32 bID) {
      return(true);
    }

-    if (ovl[M].b_iid < bID)
+    if (((ovl[M].b_iid  < bID)) ||
+        ((ovl[M].b_iid == bID) && (ovl[M].flipped < flipped)))
      F = M+1;
    else
      L = M-1;
@@ -694,7 +707,7 @@ OverlapCache::symmetrizeOverlaps(void) {

      //  Search for the twin overlap, and if found, we're done.  The twin is marked as symmetric in the function.

-      if (searchForOverlap(_overlaps[rb], _overlapLen[rb], rr)) {
+      if (searchForOverlap(_overlaps[rb], _overlapLen[rb], rr, _overlaps[rr][oo].flipped)) {
        _overlaps[rr][oo].symmetric = true;
        continue;
      }

--- a/src/bogart/AS_BAT_PlaceContains.C
+++ b/src/bogart/AS_BAT_PlaceContains.C
@@ -65,6 +65,11 @@ breakSingletonTigs(TigVector &tigs) {
    if (utg->ufpath.size() > 1)
      continue;

+    if (OG->isZombie(utg->ufpath[0].ident) == true) {
+      writeLog("Not breaking sinleton zombie tig %u with read %u\n", ti, utg->ufpath[0].ident);
+      continue;
+    }
+
    tigs[ti] = NULL;                           //  Remove the tig from the list
    tigs.registerRead(utg->ufpath[0].ident);   //  Eject the read
    delete utg;                                //  Reclaim space
@@ -125,6 +130,16 @@ placeUnplacedUsingAllOverlaps(TigVector           &tigs,

    placeReadUsingOverlaps(tigs, NULL, fid, placements, placeRead_fullMatch);

+    //  If all placements are in singletons, allow them.  If any placement is to a 'real' tig,
+    //  ignore singleton placements.
+
+    bool  ignoreSingleton = false;
+
+    for (uint32 i=0; i<placements.size(); i++)
+      if ((placements[i].fCoverage >= 0.99) &&
+          (tigs[placements[i].tigID]->ufpath.size() > 1))
+        ignoreSingleton = true;
+
    //  Search the placements for the highest expected identity placement using all overlaps in the unitig.

    uint32   b = UINT32_MAX;
@@ -135,7 +150,8 @@ placeUnplacedUsingAllOverlaps(TigVector           &tigs,
      if (placements[i].fCoverage < 0.99)     //  Ignore partially placed reads.
        continue;

-      if (tig->ufpath.size() == 1)  //  Ignore placements in singletons.
+      if ((ignoreSingleton == true) &&
+          (tig->ufpath.size() == 1))          //  Ignore placements in singletons.
        continue;

      uint32  bgn   = placements[i].position.min();

--- a/src/bogart/AS_BAT_PopulateUnitig.C
+++ b/src/bogart/AS_BAT_PopulateUnitig.C
@@ -119,8 +119,11 @@ populateUnitig(TigVector &tigs,
               int32      fi) {

  if ((RI->readLength(fi) == 0) ||      //  Skip deleted
-      (tigs.inUnitig(fi) != 0) ||         //  Skip placed
-      (OG->isContained(fi) == true))    //  Skip contained
+      (tigs.inUnitig(fi) != 0))         //  Skip placed
+    return;
+
+  if ((OG->isContained(fi) == true) &&  //  Skip contained...
+      (OG->isZombie(fi) == false))      //  that aren't zombies.
    return;

  Unitig *utg = tigs.newUnitig(logFileFlagSet(LOG_BUILD_UNITIG));
@@ -140,6 +143,22 @@ populateUnitig(TigVector &tigs,

  utg->addRead(read, 0, logFileFlagSet(LOG_BUILD_UNITIG));

+  //  If suspicious or a zombie, don't bother trying to extend.  In the former
+  //  case, we don't want to extend, and in the latter case, there isn't anything
+  //  to extend.
+
+  if (OG->isSuspicious(fi)) {
+    writeLog("Stopping unitig construction of suspicious read %d in unitig %d\n",
+            utg->ufpath.back().ident, utg->id());
+    return;
+  }
+
+  if (OG->isZombie(fi)) {
+    writeLog("Stopping unitig construction of zombie read %d in unitig %d\n",
+            utg->ufpath.back().ident, utg->id());
+    return;
+  }
+
  //  Add reads as long as there is a path to follow...from the 3' end of the first read.

  BestEdgeOverlap  *bestedge5 = OG->getBestEdgeOverlap(fi, false);
@@ -150,34 +169,6 @@ populateUnitig(TigVector &tigs,
  assert(bestedge3->ahang() >= 0);
  assert(bestedge3->bhang() >= 0);

-  //  If this read is not covered by the two best overlaps we are finished.  We will not follow
-  //  the paths out.  This indicates either low coverage, or a chimeric read.  If it is low
-  //  coverage, then the best overlaps will be mutual and we'll recover the same path.  If it is a
-  //  chimeric read the overlaps will not be mutual and we will skip this read.
-  //
-  //  The amount of our read that is covered by the two best overlaps is
-  //
-  //    (readLen + bestedge5->bhang()) + (readLen - bestedge3->ahang())
-  //
-  //  If that is not significantly longer than the read length, then we will not use this
-  //  read as a seed for unitig construction.
-  //
-
-  if (OG->isSuspicious(fi))
-    return;
-
-#if 0
-  uint32  covered = RI->readLength(fi) + bestedge5->bhang() + RI->readLength(fi) - bestedge3->ahang();
-
-  //  This breaks tigs at 0x best-coverage regions.  There might be a contain that spans (joins)
-  //  the two best overlaps to verify the read, but we can't easily tell right now.
-  if (covered < RI->readLength(fi) + AS_OVERLAP_MIN_LEN / 2) {
-    writeLog("Stopping unitig construction of suspicious read %d in unitig %d\n",
-            utg->ufpath.back().ident, utg->id());
-    return;
-  }
-#endif
-
  if (logFileFlagSet(LOG_BUILD_UNITIG))
    writeLog("Adding 5' edges off of read %d in unitig %d\n",
            utg->ufpath.back().ident, utg->id());

--- a/src/bogart/AS_BAT_Unitig.H
+++ b/src/bogart/AS_BAT_Unitig.H
@@ -40,6 +40,7 @@

 #include "AS_global.H"
 #include "AS_BAT_TigVector.H"
+#include "AS_BAT_OverlapCache.H"

 #include "stddev.H"

@@ -204,6 +205,13 @@ public:
  void cleanUp(void);

  //  Recompute bgn/end positions using all overlaps.
+  bool optimize_isCompatible(uint32       ii,
+                             uint32       jj,
+                             BAToverlap  &olap,
+                             bool         inInit,
+                             bool         secondPass,
+                             bool         beVerbose);
+
  void optimize_initPlace(uint32        pp,
                          optPos       *op,
                          optPos       *np,

--- a/src/canu_version_update.pl
+++ b/src/canu_version_update.pl
@@ -32,7 +32,7 @@ my $cwd = getcwd();

 my $label    = "snapshot";     #  Automagically set to 'release' for releases.
 my $major    = "1";            #  Bump before release.
-my $minor    = "7";            #  Bump before release.
+my $minor    = "7.1";          #  Bump before release.

 my $commits  = "0";
 my $hash1    = undef;          #  This from 'git describe'

--- a/src/overlapErrorAdjustment/findErrors-Read_Frags.C
+++ b/src/overlapErrorAdjustment/findErrors-Read_Frags.C
@@ -60,9 +60,6 @@ Read_Frags(feParameters   *G,
  uint64  votesLength = 0;
  uint64  readsLoaded = 0;

-  fprintf(stderr, "Read_Frags()-- from " F_U32 " through " F_U32 "\n",
-          G->bgnID, G->endID);
-
  for (uint32 curID=G->bgnID; curID<=G->endID; curID++) {
    gkRead *read = gkpStore->gkStore_getRead(curID);

@@ -74,11 +71,7 @@ Read_Frags(feParameters   *G,
                      sizeof(Vote_Tally_t) * votesLength +
                      sizeof(Frag_Info_t)  * G->readsLen);

-  fprintf(stderr, "Read_Frags()-- allocate " F_U64 " MB for bases, votes and info, for %u reads of total length " F_U64 " (%.2f MB)\n",
-          totAlloc >> 20,
-          G->endID - G->bgnID + 1,
-          basesLength,
-          totAlloc / 1024.0 / 1024.0);
+  fprintf(stderr, "Read_Frags()-- Loading target reads " F_U32 " through " F_U32 " with " F_U64 " bases.\n", G->bgnID, G->endID, basesLength);

  G->readBases = new char          [basesLength];
  G->readVotes = new Vote_Tally_t  [votesLength];             //  NO constructor, MUST INIT
@@ -122,6 +115,6 @@ Read_Frags(feParameters   *G,

  delete readData;

-  fprintf(stderr, "Read_Frags()-- from " F_U32 " through " F_U32 " -- loaded " F_U64 " bases in " F_U64 " reads.\n",
-          G->bgnID, G->endID-1, basesLength, readsLoaded);
+  fprintf(stderr, "Read_Frags()-- %.3f GB for bases, votes and info.\n", totAlloc / 1024.0 / 1024.0 / 1024.0);
+  fprintf(stderr, "\n");
 }
--- a/src/overlapErrorAdjustment/findErrors-Read_Olaps.C
+++ b/src/overlapErrorAdjustment/findErrors-Read_Olaps.C
@@ -41,8 +41,7 @@ Read_Olaps(feParameters *G, gkStore *gkpStore) {

  uint64 numolaps = ovs->numOverlapsInRange();

-  fprintf(stderr, "Read_Olaps()-- loading " F_U64 " overlaps (%.2f MB).\n",
-          numolaps, sizeof(Olap_Info_t) * numolaps / 1024.0 / 1024.0);
+  fprintf(stderr, "Read_Olaps()-- Loading " F_U64 " overlaps.\n", numolaps);

  G->olaps    = new Olap_Info_t [numolaps];
  G->olapsLen = 0;
@@ -68,6 +67,9 @@ Read_Olaps(feParameters *G, gkStore *gkpStore) {
  }

  delete ovs;
+
+  fprintf(stderr, "Read_Olaps()-- %.3f GB for overlaps..\n", sizeof(Olap_Info_t) * numolaps / 1024.0 / 1024.0 / 1024.0);
+  fprintf(stderr, "\n");
 }


--- a/src/overlapErrorAdjustment/findErrors.C
+++ b/src/overlapErrorAdjustment/findErrors.C
@@ -51,24 +51,21 @@ Output_Corrections(feParameters *G);



-//  From overlapInCore.C
-int
-Binomial_Bound(int e, double p, int Start, double Limit);
-
-
-
 //  Read fragments lo_frag..hi_frag (INCLUSIVE) from store and save the ids and sequences of those
 //  with overlaps to fragments in global Frag .

 static
 void
-Extract_Needed_Frags(feParameters *G,
+extractReads(feParameters *G,
             gkStore      *gkpStore,
-                     uint32        loID,
-                     uint32        hiID,
             Frag_List_t  *fl,
             uint64       &nextOlap) {

+  //  Clear the buffer.
+
+  fl->readsLen = 0;
+  fl->basesLen = 0;
+
  //  The original converted to lowercase, and made non-acgt be 'a'.

  char  filter[256];
@@ -81,35 +78,44 @@ Extract_Needed_Frags(feParameters *G,
  filter['G'] = filter['g'] = 'g';
  filter['T'] = filter['t'] = 't';

-  //  Count the amount of stuff we're loading.
+ //  Return if we've exhausted the overlaps.

-  fl->readsLen = 0;
-  fl->basesLen = 0;
+  if (nextOlap >= G->olapsLen)
+    return;
+
+  //  Count the amount of stuff we're loading.

  uint64 lastOlap = nextOlap;
-  uint32 ii       = 0;                        //  Index into reads arrays
-  uint32 fi       = G->olaps[lastOlap].b_iid;  //  Actual ID we're extracting
+  uint32 loID     = G->olaps[lastOlap].b_iid;  //  Actual ID we're extracting
+  uint32 hiID     = loID;
+  uint64 maxBases = 512 * 1024 * 1024;

-  assert(loID <= fi);
+  //  Find the highest read ID that we can load without exceeding maxBases.

-  fprintf(stderr, "\n");
-  fprintf(stderr, "Extract_Needed_Frags()--  Loading used reads between " F_U32 " and " F_U32 ", at overlap " F_U64 ".\n", fi, hiID, lastOlap);
+  while ((fl->basesLen < maxBases) &&
+         (lastOlap     < G->olapsLen)) {
+    hiID = G->olaps[lastOlap].b_iid;                        //  Grab the ID of the overlap we're at.

-  while (fi <= hiID) {
-    gkRead *read = gkpStore->gkStore_getRead(fi);
+    gkRead *read = gkpStore->gkStore_getRead(hiID);         //  Grab that read.

-    fl->readsLen += 1;
+    fl->readsLen += 1;                                      //  Add the read to our set.
    fl->basesLen += read->gkRead_sequenceLength() + 1;

-    //  Advance to the next overlap
-
-    lastOlap++;
-    while ((lastOlap < G->olapsLen) && (G->olaps[lastOlap].b_iid == fi))
-      lastOlap++;
-    fi = (lastOlap < G->olapsLen) ? G->olaps[lastOlap].b_iid : hiID + 1;
+   lastOlap++;                                             //  Advance to the next overlap
+    while ((lastOlap < G->olapsLen) &&                      //
+           (G->olaps[lastOlap].b_iid == hiID))              //  If we've exceeded the max size or hit the last overlap,
+      lastOlap++;                                           //  the loop will stop on the next iteration.
  }

-  fprintf(stderr, "Extract_Needed_Frags()--  Loading reads for overlaps " F_U64 " to " F_U64 " (reads " F_U32 " bases " F_U64 ")\n", nextOlap, lastOlap, fl->readsLen, fl->basesLen);
+  //  If nothing to load, just return.
+
+  if (fl->readsLen == 0)
+    return;
+
+  //  Report what we're going to do.
+
+  fprintf(stderr, "extractReads()-- Loading reads " F_U32 " to " F_U32 " (" F_U32 " reads with " F_U64 " bases) overlaps " F_U64 " through " F_U64 ".\n", 
+          loID, hiID, fl->readsLen, fl->basesLen, nextOlap, lastOlap);

  //  Ensure there is space.

@@ -117,42 +123,31 @@ Extract_Needed_Frags(feParameters *G,
    delete [] fl->readIDs;
    delete [] fl->readBases;

-    //fprintf(stderr, "Extract_Needed_Frags()--  realloc reads from " F_U32 " to " F_U32 "\n", fl->readsMax, 12 * fl->readsLen / 10);
-
-    fl->readIDs   = new uint32 [12 * fl->readsLen / 10];
-    fl->readBases = new char * [12 * fl->readsLen / 10];
-
    fl->readsMax  = 12 * fl->readsLen / 10;
+    fl->readIDs   = new uint32 [fl->readsMax];
+    fl->readBases = new char * [fl->readsMax];
  }

  if (fl->basesMax < fl->basesLen) {
    delete [] fl->bases;

-    //fprintf(stderr, "Extract_Needed_Frags()--  realloc bases from " F_U64 " to " F_U64 "\n", fl->basesMax, 12 * fl->basesLen / 10);
-
-    fl->bases       = new char [12 * fl->basesLen / 10];
-
    fl->basesMax    = 12 * fl->basesLen / 10;
+    fl->bases       = new char [fl->basesMax];
  }

-  //  Load.  This is complicated by loading only the reads that have overlaps we care about.
-
-  fl->readsLen = 0;
-  fl->basesLen = 0;
+  //  Load the sequence data for reads loID to hiID, as long as the read has an overlap.

  gkReadData *readData = new gkReadData;

-  ii = 0;
-  fi = G->olaps[nextOlap].b_iid;
-
-  assert(loID <= fi);
+  fl->readsLen = 0;
+  fl->basesLen = 0;

-  while (fi <= hiID) {
-    gkRead *read       = gkpStore->gkStore_getRead(fi);
+  while ((loID <= hiID) &&
+         (nextOlap < G->olapsLen)) {
+    gkRead *read       = gkpStore->gkStore_getRead(loID);

-    fl->readIDs[ii]     = fi;
-    fl->readBases[ii]   = fl->bases + fl->basesLen;
-    fl->basesLen       += read->gkRead_sequenceLength() + 1;
+    fl->readIDs[fl->readsLen]   = loID;                          //  Save the ID of _this_ read.
+    fl->readBases[fl->readsLen] = fl->bases + fl->basesLen;      //  Set the data pointer to where this read should start.

    gkpStore->gkStore_loadReadData(read, readData);

@@ -160,31 +155,25 @@ Extract_Needed_Frags(feParameters *G,
    char   *readBases  = readData->gkReadData_getSequence();

    for (uint32 bb=0; bb<readLen; bb++)
-      fl->readBases[ii][bb] = filter[readBases[bb]];
+      fl->readBases[fl->readsLen][bb] = filter[readBases[bb]];

-    fl->readBases[ii][readLen] = 0;  //  All good reads end.
+    fl->readBases[fl->readsLen][readLen] = 0;                    //  All good reads end.

-    ii++;
+    fl->basesLen += read->gkRead_sequenceLength() + 1;           //  Update basesLen to account for this read.
+    fl->readsLen += 1;                                           //  And note that we loaded a read.

-    //  Advance to the next overlap.
-
-    nextOlap++;
-    while ((nextOlap < G->olapsLen) && (G->olaps[nextOlap].b_iid == fi))
+    nextOlap++;                                                  //  Advance past all the overlaps for this read.
+    while ((nextOlap < G->olapsLen) &&
+           (G->olaps[nextOlap].b_iid == loID))
      nextOlap++;
-    fi = (nextOlap < G->olapsLen) ? G->olaps[nextOlap].b_iid : hiID + 1;
+
+    if (nextOlap < G->olapsLen)                                  //  If we have valid overlap, grab the read ID.
+      loID = G->olaps[nextOlap].b_iid;                           //  If we don't have a valid overlap, the loop will stop.
  }

  delete readData;

-  fl->readsLen = ii;
-
-  if (fl->readsLen > 0)
-    fprintf(stderr, "Extract_Needed_Frags()--  Loaded " F_U32 " reads (%.4f%%).  Loaded IDs " F_U32 " through " F_U32 ".\n",
-            fl->readsLen, 100.0 * fl->readsLen / (hiID - 1 - loID),
-            fl->readIDs[0], fl->readIDs[fl->readsLen-1]);
-  else
-    fprintf(stderr, "Extract_Needed_Frags()--  Loaded " F_U32 " reads (%.4f%%).\n",
-            fl->readsLen, 100.0 * fl->readsLen / (hiID - 1 - loID));
+  fprintf(stderr, "extractReads()-- Loaded.\n");
 }


@@ -262,8 +251,6 @@ Threaded_Stream_Old_Frags(feParameters *G,

  for (uint32 i=0; i<G->numThreads; i++) {
    thread_wa[i].thread_id    = i;
-    thread_wa[i].loID         = 0;
-    thread_wa[i].hiID         = 0;
    thread_wa[i].nextOlap     = 0;
    thread_wa[i].G            = G;
    thread_wa[i].frag_list    = NULL;
@@ -278,14 +265,6 @@ Threaded_Stream_Old_Frags(feParameters *G,
    thread_wa[i].ped.initialize(G, G->errorRate);
  }

-  uint32 loID  = G->olaps[0].b_iid;
-  uint32 hiID  = loID + FRAGS_PER_BATCH - 1;
-
-  uint32 endID = G->olaps[G->olapsLen - 1].b_iid;
-
-  if (hiID > endID)
-    hiID = endID;
-
  uint64 frstOlap = 0;
  uint64 nextOlap = 0;

@@ -295,15 +274,13 @@ Threaded_Stream_Old_Frags(feParameters *G,
  Frag_List_t  *curr_frag_list = &frag_list_1;
  Frag_List_t  *next_frag_list = &frag_list_2;

-  Extract_Needed_Frags(G, gkpStore, loID, hiID, curr_frag_list, nextOlap);
+  extractReads(G, gkpStore, curr_frag_list, nextOlap);

-  while (loID <= endID) {
+  while (curr_frag_list->readsLen > 0) {

    // Process fragments in curr_frag_list in background

    for (uint32 i=0; i<G->numThreads; i++) {
-      thread_wa[i].loID      = loID;
-      thread_wa[i].hiID      = hiID;
      thread_wa[i].nextOlap  = frstOlap;
      thread_wa[i].frag_list = curr_frag_list;

@@ -315,21 +292,14 @@ Threaded_Stream_Old_Frags(feParameters *G,

    // Read next batch of fragments

-    loID = hiID + 1;
-
-    if (loID <= endID) {
-      hiID = loID + FRAGS_PER_BATCH - 1;
-
-      if (hiID > endID)
-        hiID = endID;
-
    frstOlap = nextOlap;

-      Extract_Needed_Frags(G, gkpStore, loID, hiID, next_frag_list, nextOlap);
-    }
+    extractReads(G, gkpStore, next_frag_list, nextOlap);

    // Wait for background processing to finish

+    fprintf(stderr, "processReads()-- Waiting for compute.\n");
+
    for (uint32 i=0; i<G->numThreads; i++) {
      void  *ptr;


--- a/src/overlapErrorAdjustment/findErrors.H
+++ b/src/overlapErrorAdjustment/findErrors.H
@@ -72,10 +72,6 @@ using namespace std;
 //  Factor by which to grow memory in olap array when reading it
 #define  EXPANSION_FACTOR            1.4

-//  Number of old fragments to read into memory-based fragment
-//  store at a time for processing
-#define  FRAGS_PER_BATCH             100000
-
 //  Longest name allowed for a file in the overlap store
 #define  MAX_FILENAME_LEN            1000

@@ -273,8 +269,6 @@ public:

 struct Thread_Work_Area_t {
  int32         thread_id;
-  uint32        loID;
-  uint32        hiID;
  uint64        nextOlap;

  feParameters *G;

--- a/src/pipelines/canu/Defaults.pm
+++ b/src/pipelines/canu/Defaults.pm
@@ -682,7 +682,7 @@ sub setOverlapDefaults ($$$) {

    setOverlapDefault($tag, "OvlHashBlockLength",  undef,                     "Amount of sequence (bp) to load into the overlap hash table");
    setOverlapDefault($tag, "OvlRefBlockLength",   undef,                     "Amount of sequence (bp) to search against the hash table per batch");
-    setOverlapDefault($tag, "OvlHashBits",         ($tag eq "cor") ? 18 : 23, "Width of the kmer hash.  Width 22=1gb, 23=2gb, 24=4gb, 25=8gb.  Plus 10b per ${tag}OvlHashBlockLength");
+    setOverlapDefault($tag, "OvlHashBits",         undef,                     "Width of the kmer hash.  Width 22=1gb, 23=2gb, 24=4gb, 25=8gb.  Plus 10b per ${tag}OvlHashBlockLength");
    setOverlapDefault($tag, "OvlHashLoad",         0.75,                      "Maximum hash table load.  If set too high, table lookups are inefficent; if too low, search overhead dominates run time; default 0.75");
    setOverlapDefault($tag, "OvlMerSize",          ($tag eq "cor") ? 19 : 22, "K-mer size for seeds in overlaps");
    setOverlapDefault($tag, "OvlMerThreshold",     "auto",                    "K-mer frequency threshold; mers more frequent than this count are ignored; default 'auto'");