Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille
--- a/debian/copyright
+++ b/debian/copyright
@@ -25,7 +25,6 @@ Files-Excluded:
    src/index
    src/intronerator
    src/isPcr
-    src/j*
    src/k*
    src/m*
    src/o*

--- a/debian/libucsc-utils.install
+++ b/debian/libucsc-utils.install
+src/lib/tests/bin/*/*	usr/bin
--- a/debian/rules
+++ b/debian/rules
@@ -13,11 +13,12 @@ include /usr/share/dpkg/default.mk
 # DEB_DISTRIBUTION: the distribution(s) listed in the current entry of debian/changelog

 # for hardening you might like to uncomment this:
-# export DEB_BUILD_MAINT_OPTIONS=hardening=+all
+export DEB_BUILD_MAINT_OPTIONS=hardening=+all

 %:
 	dh $@ --sourcedirectory=src/lib --no-parallel

 override_dh_auto_build:
 	make --directory=src/htslib
+	make --directory=src/jkOwnLib
 	dh_auto_build
--- a/src/jkOwnLib/README
+++ b/src/jkOwnLib/README
+The modules in this library generally are supporting
+the BLAT or WABA programs.  These modules are all
+copyrighted Jim Kent. License hereby is granted for
+personal, academic and non-profit purposes.  Commercial
+use is permitted only by explicit agreement with Jim Kent.
--- a/src/jkOwnLib/bandExt.c
+++ b/src/jkOwnLib/bandExt.c
+/* bandExt - banded Smith-Waterman extension of alignments. 
+ * An aligner might first find perfectly matching hits of
+ * a small size, then extend these hits as far as possible
+ * while the sequences perfectly match, then call on routines
+ * in this module to do further extensions allowing small
+ * gaps and mismatches. 
+ *
+ * This version of the algorithm uses affine gap scorea and
+ * has the neat feature that the band can wander around.  
+ * When a score exceeds any previous score, the band will be 
+ * recentered around the highest scoring position. */
+/* Copyright 2003-5 Jim Kent.  All rights reserved. */
+
+#include "common.h"
+#include "dnaseq.h"
+#include "axt.h"
+#include "fuzzyFind.h"
+#include "localmem.h"
+#include "bandExt.h"
+
+
+/* Definitions for traceback byte.  This is encoded as so:
+ *     xxxxLUMM
+ * where the x's are not used.  The L bit is set if parent of the left insert
+ * is also a left insert (otherwise it is a match). Likewise the U bit is set
+ * if the parent of an up insert is also an up insert.  The MM bits contain the
+ * parent of the match state. */
+
+#define mpMatch 1
+#define mpUp 2
+#define mpLeft 3
+#define mpMask 3
+#define upExt (1<<2)
+#define lpExt (1<<3)
+
+struct score
+/* Info on score in our three states. */
+   {
+   int match;
+   int up;
+   int left;
+   };
+
+boolean bandExt(boolean global, struct axtScoreScheme *ss, int maxInsert,
+	char *aStart, int aSize, char *bStart, int bSize, int dir,
+	int symAlloc, int *retSymCount, char *retSymA, char *retSymB, 
+	int *retStartA, int *retStartB)
+/* Try to extend an alignment from aStart/bStart onwards.
+ * Set maxInsert to the maximum gap size allowed.  3 is often
+ * a good choice.  aStart/aSize bStart/bSize describe the
+ * sequence to extend through (not including any of the existing
+ * alignment. Set dir = 1 for forward extension, dir = -1 for backwards. 
+ * retSymA and retSymB should point to arrays of characters of
+ * symAlloc size.  symAlloc needs to be aSize*2 or so.  The
+ * alignment is returned in the various ret values.  The function
+ * overall returns TRUE if an extension occurred, FALSE if not. */
+{
+int i;			/* A humble index or two. */
+int *bOffsets = NULL;	/* Offset of top of band. */
+UBYTE **parents = NULL;	/* Array of parent positions. */
+struct score *curScores = NULL;	/* Scores for current column. */
+struct score *prevScores = NULL;	/* Scores for previous column. */
+struct score *swapScores;	/* Helps to swap cur & prev column. */
+int bandSize = 2*maxInsert + 1;	 /* Size of band including middle */
+int maxIns1 = maxInsert + 1;   	 /* Max insert plus one. */
+int bandPlus = bandSize + 2*maxIns1;  /* Band plus sentinels on either end. */
+int bestScore = 0;	 	 /* Best score so far. */
+int aPos, aBestPos = -1;	 /* Current and best position in a. */
+int bPos, bBestPos = -1;	 /* Current and best position in b. */
+int bandCenter = 0;	/* b Coordinate of current band center. */
+int colShift = 1;	/* Vertical shift between this column and previous. */
+int prevScoreOffset;	/* Offset into previous score column. */
+int curScoreOffset;	/* Offset into current score column. */
+int symCount = 0;	/* Size of alignment and size allocated for it. */
+int gapOpen = ss->gapOpen;	 /* First base in gap costs this. */
+int gapExtend = ss->gapExtend;	 /* Extra bases in gap cost this. */
+int badScore = -gapOpen*100;	 /* A score bad enough no one will want to link with us. */
+int maxDrop = gapOpen + gapExtend*maxInsert; /* Max score drop allowed before giving up. */
+int midScoreOff;		 /* Offset to middle of scoring array. */
+struct lm *lm;			 /* Local memory pool. */
+boolean didExt = FALSE;
+int initGapScore = -gapOpen;
+
+/* For reverse direction just reverse bytes here and there.  It's
+ * a lot easier than the alternative and doesn't cost much time in
+ * the global scheme of things. */
+if (dir < 0)
+    {
+    reverseBytes(aStart, aSize);
+    reverseBytes(bStart, bSize);
+    }
+#ifdef DEBUG
+uglyf("bandExt: dir %d, aStart %d, aSize %d, symAlloc %d\n", dir,
+	aSize, bSize, symAlloc);
+mustWrite(uglyOut, aStart, aSize);
+uglyf("\n");
+mustWrite(uglyOut, bStart, bSize);
+uglyf("\n");
+#endif /* DEBUG */
+
+/* Make up a local mem structure big enough for everything. 
+ * This is just a minor, perhaps misguided speed tweak to
+ * avoid multiple mallocs. */
+lm = lmInit(
+    sizeof(bOffsets[0])*aSize +
+    sizeof(parents[0])*bandSize +
+    bandSize*(sizeof(parents[0][0])*aSize) +
+    sizeof(curScores[0]) * bandPlus +
+    sizeof(prevScores[0]) * bandPlus );
+
+/* Allocate data structures out of local memory pool. */
+lmAllocArray(lm, bOffsets, aSize);
+lmAllocArray(lm, parents, bandSize);
+for (i=0; i<bandSize; ++i)
+    lmAllocArray(lm, parents[i], aSize);
+lmAllocArray(lm, curScores, bandPlus);
+lmAllocArray(lm, prevScores, bandPlus);
+
+/* Set up scoring arrays so that stuff outside of the band boundary 
+ * looks bad.  There will be maxIns+1 of these sentinel values at
+ * the start and at the beginning. */
+for (i=0; i<bandPlus; ++i)
+    {
+    struct score *cs = &curScores[i];
+    cs->match = cs->up = cs->left = badScore;
+    cs = &prevScores[i];
+    cs->match = cs->up = cs->left = badScore;
+    }
+
+/* Set up scoring array so that extending without an initial insert
+ * looks relatively good. */
+midScoreOff = 1 + 2 * maxInsert;
+prevScores[midScoreOff].match = 0;
+
+/* Set up scoring array so that initial inserts of up to maxInsert
+ * are allowed but penalized appropriately. */
+    {
+    int score = -gapOpen;
+    for (i=0; i<maxInsert; ++i)
+	{
+	prevScores[midScoreOff+i].up = score;
+	score -= gapExtend;
+	}
+    }
+
+for (aPos=0; aPos < aSize; ++aPos)
+    {
+    char aBase = aStart[aPos];
+    int *matRow = ss->matrix[(int)aBase];
+    int bestColScore = badScore;
+    int bestColPos = -1;
+    int colTop = bandCenter - maxInsert;
+    int colBottom = bandCenter + maxIns1;
+
+    if (colTop < 0) colTop = 0;
+    if (colBottom > bSize) colBottom = bSize;
+    curScoreOffset =  maxIns1 + colTop - (bandCenter - maxInsert);
+    prevScoreOffset = curScoreOffset + colShift;
+#ifdef DEBUG
+    uglyf("curScoreOffset %d, maxIns %d, prevScoreOffset %d\n", curScoreOffset, maxInsert, prevScoreOffset);
+#endif /* DEBUG */
+
+    /* At top we deal with possibility of initial insert that
+     * comes in at this point, unless the band has wandered off. */
+    if (aPos < maxInsert)
+	{
+	curScores[curScoreOffset-1].up = initGapScore;
+	initGapScore -= gapExtend;
+	}
+    else
+	curScores[curScoreOffset-1].up = badScore;
+
+    for (bPos = colTop; bPos < colBottom; ++bPos)
+	{
+	UBYTE parent;
+	struct score *curScore = &curScores[curScoreOffset];
+
+#ifdef DEBUG
+	uglyf("aPos %d, bPos %d, %c vs %c\n", aPos, bPos, aBase, bStart[bPos]);
+	uglyf("  diag[%d %d %d], left[%d %d %d], up[%d %d %d]\n", 
+		prevScores[prevScoreOffset-1].match, 
+		prevScores[prevScoreOffset-1].left, 
+		prevScores[prevScoreOffset-1].up, 
+		prevScores[prevScoreOffset].match, 
+		prevScores[prevScoreOffset].left, 
+		prevScores[prevScoreOffset].up, 
+		curScores[curScoreOffset-1].match, 
+		curScores[curScoreOffset-1].left, 
+		curScores[curScoreOffset-1].up);
+#endif /* DEBUG */
+
+	/* Handle ways into the matching state and record if it's
+	 * best score so far. */
+	    {
+	    int match = matRow[(int)bStart[bPos]];
+	    struct score *a = &prevScores[prevScoreOffset-1];
+	    int diagScore = a->match;
+	    int leftScore = a->left;
+	    int upScore = a->up;
+	    int score;
+	    if (diagScore >= leftScore && diagScore >= upScore)
+	        {
+		score = diagScore + match;
+		parent = mpMatch;
+		}
+	    else if (leftScore > upScore)
+	        {
+		score = leftScore + match;
+	        parent = mpLeft;
+		}
+	    else
+	        {
+	        score = upScore + match;
+	        parent = mpUp;
+	        }
+	    curScore->match = score;
+	    if (score > bestColScore)
+	        {
+		bestColScore = score;
+		bestColPos = bPos;
+		}
+	    }
+
+	/* Handle ways into left gap state. */
+	    {
+	    struct score *a = &prevScores[prevScoreOffset];
+	    int extScore = a->left - gapExtend;
+	    int openScore = a->match - gapOpen;
+	    if (extScore >= openScore)
+	        {
+		parent |= lpExt;
+		curScore->left = extScore;
+		}
+	    else
+	        {
+		curScore->left = openScore;
+		}
+	    }
+
+	/* Handle ways into up gap state. */
+	    {
+	    struct score *a = &curScore[-1];
+	    int extScore = a->up - gapExtend;
+	    int openScore = a->match - gapOpen;
+	    if (extScore >= openScore)
+	        {
+		parent |= upExt;
+		curScore->up = extScore;
+		}
+	    else
+	        {
+		curScore->up = openScore;
+		}
+	    }
+
+	parents[curScoreOffset - maxIns1][aPos] = parent;
+
+#ifdef DEBUG
+	uglyf(" cur [%d %d %d]\n", 
+		curScore->match, 
+		curScore->left, 
+		curScore->up);
+#endif /* DEBUG */
+	/* Advance to next row in column. */
+	curScoreOffset += 1;
+	prevScoreOffset += 1;
+	}
+
+
+    /* If this column's score is best so far make note of
+     * it and shift things so that the matching bases at the
+     * best score are centered in the band.  This allows the
+     * band to wander where appropriate.  Having the band shift
+     * to the best column position if it is not the best score
+     * so far doesn't work so well though.  */
+    if (bestScore < bestColScore)
+	{
+	bestScore = bestColScore;
+	aBestPos = aPos;
+	bBestPos = bestColPos;
+	colShift = (bestColPos + 1) - bandCenter;
+	}
+    else if (bestColScore < bestScore - maxDrop)
+	{
+	if (!global)
+	    break;
+	}
+    else
+	{
+	colShift = 1;
+	}
+
+    /* Keep track of vertical offset of this column, and
+     * set up offset of next column. */
+    bOffsets[aPos] = bandCenter;
+    bandCenter += colShift;
+
+    /* Swap scoring arrays so current becomes next iteration's previous. */
+    swapScores = curScores;
+    curScores = prevScores;
+    prevScores = swapScores;
+    }
+
+
+/* Trace back. */
+#ifdef DEBUG
+uglyf("traceback: bestScore %d, aBestPos %d, bBestPos %d\n", bestScore, aBestPos, bBestPos);
+#endif /* DEBUG */
+if (global || bestScore > 0)
+    {
+    boolean upState = FALSE;
+    boolean leftState = FALSE;
+    didExt = TRUE;
+    if (global)
+        {
+	aPos = aSize-1;
+	bPos = bSize-1;
+	}
+    else
+	{
+	aPos = aBestPos;
+	bPos = bBestPos;
+	}
+    for (;;)
+	{
+	int pOffset;
+	UBYTE parent;
+	pOffset = bPos - bOffsets[aPos] + maxInsert;
+	if (pOffset < 0) pOffset = 0;
+	// FIXME: there may be some cases near beginning where
+	// pOffset is not right.  Clamping it at 0 prevents a crash
+	// and produces correct behavior in many cases.  However
+	// I'm thinking a better fix would be to have bOffsets follow
+	// colTop rather than bandCenter somehow. Exactly how is
+	// beyond me at the moment.
+	if (pOffset >= bandSize)
+	    {
+#ifdef DEBUG
+	    uglyf("bPos %d, aPos %d, bOffsets[aPos] %d, maxInsert %d\n", bPos, aPos, bOffsets[aPos], maxInsert);
+	    uglyf("pOffset = %d, bandSize = %d\n", pOffset, bandSize);
+	    uglyf("aSize %d, bSize %d\n", aSize, bSize);
+	    faWriteNext(uglyOut, "uglyA", aStart, aSize);
+	    faWriteNext(uglyOut, "uglyB", bStart, bSize);
+#endif /* DEBUG */
+	    assert(global);
+	    return FALSE;
+	    }
+	parent = parents[pOffset][aPos];
+#ifdef DEBUG
+	uglyf("aPos %d, bPos %d, parent %d, pMask %d upState %d, leftState %d\n", aPos, bPos, parent, parent & mpMask, upState, leftState);
+#endif /* DEBUG */
+	if (upState)
+	    {
+	    retSymA[symCount] = '-';
+	    retSymB[symCount] = bStart[bPos];
+	    bPos -= 1;
+	    upState = (parent&upExt);
+	    }
+	else if (leftState)
+	    {
+	    retSymA[symCount] = aStart[aPos];
+	    retSymB[symCount] = '-';
+	    aPos -= 1;
+	    leftState = (parent&lpExt);
+	    }
+	else
+	    {
+	    retSymA[symCount] = aStart[aPos];
+	    retSymB[symCount] = bStart[bPos];
+	    aPos -= 1;
+	    bPos -= 1;
+	    parent &= mpMask;
+	    if (parent == mpUp)
+	        upState = TRUE;
+	    else if (parent == mpLeft)
+	        leftState = TRUE;
+	    }
+	if (++symCount >= symAlloc)
+	    errAbort("unsufficient symAlloc in bandExt");
+	if (aPos < 0 || bPos < 0)
+	    {
+	    while (aPos >= 0)
+		{
+		retSymA[symCount] = aStart[aPos];
+		retSymB[symCount] = '-';
+		if (++symCount >= symAlloc)
+		    errAbort("unsufficient symAlloc in bandExt");
+		--aPos;
+		}
+	    while (bPos >= 0)
+		{
+		retSymA[symCount] = '-';
+		retSymB[symCount] = bStart[bPos];
+		if (++symCount >= symAlloc)
+		    errAbort("unsufficient symAlloc in bandExt");
+		--bPos;
+		}
+	    break;
+	    }
+	}
+    if (dir > 0)
+	{
+	reverseBytes(retSymA, symCount);
+	reverseBytes(retSymB, symCount);
+	}
+    retSymA[symCount] = 0;
+    retSymB[symCount] = 0;
+    }
+else
+    {
+    retSymA[0] = retSymB[0] = 0;
+    }
+
+if (dir < 0)
+    {
+    reverseBytes(aStart, aSize);
+    reverseBytes(bStart, bSize);
+    }
+
+/* Clean up, set return values and go home */
+lmCleanup(&lm);
+if (retStartA != NULL) *retStartA = aBestPos;
+if (retStartB != NULL) *retStartB = bBestPos;
+*retSymCount = symCount;
+return didExt;
+}
+
+struct ffAli *bandExtFf(
+	struct lm *lm,	/* Local memory pool, NULL to use global allocation for ff */
+	struct axtScoreScheme *ss, 	/* Scoring scheme to use. */
+	int maxInsert,			/* Maximum number of inserts to allow. */
+	struct ffAli *origFf,		/* Alignment block to extend. */
+	char *nStart, char *nEnd,	/* Bounds of region to extend through */
+	char *hStart, char *hEnd,	/* Bounds of region to extend through */
+	int dir,			/* +1 to extend end, -1 to extend start */
+	int maxExt)			/* Maximum length of extension. */
+/* Extend a gapless alignment in one direction.  Returns extending
+ * ffAlis, not linked into origFf, or NULL if no extension possible. */
+{
+int symAlloc = 2*maxExt;
+char *symBuf = needMem(4*maxExt);
+char *nBuf = symBuf;
+char *hBuf = symBuf + symAlloc;
+char *ns, *hs;
+int symCount, nExt, hExt;
+int nSize, hSize;
+struct ffAli *ffList = NULL;
+boolean gotExt;
+
+if (dir > 0)
+    {
+    nSize = nEnd - origFf->nEnd;
+    hSize = hEnd - origFf->hEnd;
+    if (nSize > maxExt) nSize = maxExt;
+    if (hSize > maxExt) hSize = maxExt;
+    ns = origFf->nEnd;
+    hs = origFf->hEnd;
+    }
+else
+    {
+    nSize = origFf->nStart - nStart;
+    hSize = origFf->hStart - hStart;
+    if (nSize > maxExt) nSize = maxExt;
+    if (hSize > maxExt) hSize = maxExt;
+    ns = origFf->nStart - nSize;
+    hs = origFf->hStart - hSize;
+    }
+
+gotExt = bandExt(FALSE, ss, maxInsert, ns, nSize, hs, hSize, dir,
+	symAlloc, &symCount, nBuf, hBuf, &nExt, &hExt);
+if (gotExt)
+    {
+    char *nExtStart, *hExtStart;
+    if (dir > 0)
+	{
+	nExtStart = ns;
+	hExtStart = hs;
+	}
+    else
+	{
+	nExtStart = origFf->nStart - nExt - 1;
+	hExtStart = origFf->hStart - hExt - 1;
+	}
+    ffList = ffAliFromSym(symCount, nBuf, hBuf, lm,  nExtStart, hExtStart);
+    }
+freeMem(symBuf);
+return ffList;
+}
+
--- a/src/jkOwnLib/crudeali.c
+++ b/src/jkOwnLib/crudeali.c
--- a/src/jkOwnLib/ffAliHelp.c
+++ b/src/jkOwnLib/ffAliHelp.c
+/* ffAliHelp - Helper routines for things that produce (rather than just
+ * consume) ffAli type alignments. */
+/* Copyright 2000-2003 Jim Kent.  All rights reserved. */
+
+#include "common.h"
+#include "fuzzyFind.h"
+#include "dnaseq.h"
+
+
+void ffCat(struct ffAli **pA, struct ffAli **pB)
+/* Concatenate B to the end of A. Eat up second list
+ * in process. */
+{
+struct ffAli *a = *pA;
+struct ffAli *b = *pB;
+
+/* If list to add is empty our job is real easy. */
+if (b == NULL)
+    return;
+
+/* If list to add into is empty, then just switch in the
+ * second list. */
+if (a == NULL)
+    {
+    *pA = *pB;
+    *pB = NULL;
+    return;
+    }
+
+/* Neither list empty.  Find rightmost element of first list
+ * and cross-link it with leftmost element of second list. */
+while (a->right != NULL) a = a->right;
+b->left = a;
+a->right = b;
+*pB = NULL;
+}
+
+void ffAliSort(struct ffAli **pList, int (*compare )(const void *elem1,  const void *elem2))
+/* Sort a doubly linked list of ffAlis. */
+{
+/* Get head of list and handle easy special empty case. */
+struct ffAli *r = *pList;
+if (r == NULL) return;
+
+/* Since first pointer is "left", in order to reuse slSort, have
+ * to jump through some minor hoops. First go to right end of list,
+ * then sort it. */
+while (r->right) r = r->right;
+slSort(&r, compare);
+
+/* We're sorted, but our right links are all broken.  Fix this. */
+slReverse(&r);
+r = ffMakeRightLinks(r);
+*pList = r;
+}
+
+int ffCmpHitsHayFirst(const void *va, const void *vb)
+/* Compare function to sort hit array by ascending
+ * target offset followed by ascending query offset. */
+{
+const struct ffAli *a = *((struct ffAli **)va);
+const struct ffAli *b = *((struct ffAli **)vb);
+int diff;
+if ((diff = a->hStart - b->hStart) != 0)
+    return diff;
+return a->nStart - b->nStart;
+}
+
+int ffCmpHitsNeedleFirst(const void *va, const void *vb)
+/* Compare function to sort hit array by ascending
+ * query offset followed by ascending target offset. */
+{
+const struct ffAli *a = *((struct ffAli **)va);
+const struct ffAli *b = *((struct ffAli **)vb);
+int diff;
+if ((diff = a->nStart - b->nStart) != 0)
+    return diff;
+return a->hStart - b->hStart;
+}
+
+void ffExpandExactRight(struct ffAli *ali, DNA *needleEnd, DNA *hayEnd)
+/* Expand aligned segment to right as far as can exactly. */
+{
+DNA *nEnd = ali->nEnd;
+DNA *hEnd = ali->hEnd;
+while (nEnd < needleEnd && hEnd < hayEnd)
+    {
+    if (*nEnd != *hEnd)
+        break;
+    nEnd += 1;
+    hEnd += 1;
+    }
+ali->nEnd = nEnd;
+ali->hEnd = hEnd;
+return;
+}
+
+void ffExpandExactLeft(struct ffAli *ali, DNA *needleStart, DNA *hayStart)
+/* Expand aligned segment to left as far as can exactly. */
+{
+DNA *nStart = ali->nStart-1;
+DNA *hStart = ali->hStart-1;
+while (nStart >= needleStart && hStart >= hayStart)
+    {
+    if (*nStart != *hStart)
+        break;
+    nStart -= 1;
+    hStart -= 1;
+    }
+ali->nStart = nStart + 1;
+ali->hStart = hStart + 1;
+return;
+}
+
+struct ffAli *ffMergeClose(struct ffAli *aliList, 
+	DNA *needleStart, DNA *hayStart)
+/* Remove overlapping areas needle in alignment. Assumes ali is sorted on
+ * ascending nStart field. Also merge perfectly abutting neighbors or
+ * ones that could be merged at the expense of just a few mismatches.*/
+{
+struct ffAli *mid, *ali;
+int closeEnough = -3;
+
+if (aliList == NULL)
+    return NULL;
+for (mid = aliList->right; mid != NULL; mid = mid->right)
+    {
+    for (ali = aliList; ali != mid; ali = ali->right)
+	{
+	char *nStart, *nEnd;
+	int nOverlap;
+	nStart = max(ali->nStart, mid->nStart);
+	nEnd = min(ali->nEnd, mid->nStart);
+	nOverlap = nEnd - nStart;
+	/* Overlap or perfectly abut in needle, and needle/hay
+	 * offset the same. */
+	if (nOverlap >= closeEnough)
+	    {
+	    int aliDiag = (ali->nStart - needleStart) - (ali->hStart - hayStart);
+	    int midDiag = (mid->nStart - needleStart) - (mid->hStart - hayStart);
+	    if (aliDiag == midDiag)
+		{
+		/* Make mid encompass both, and make ali empty. */
+		mid->nStart = min(ali->nStart, mid->nStart);
+		mid->nEnd = max(ali->nEnd, mid->nEnd);
+		mid->hStart = min(ali->hStart, mid->hStart);
+		mid->hEnd = max(ali->hEnd, mid->hEnd);
+		ali->hStart = ali->hEnd = mid->hStart;
+		ali->nEnd = ali->nStart = mid->nStart;;
+		}
+	    }
+	}
+    }
+aliList = ffRemoveEmptyAlis(aliList, TRUE);
+return aliList;
+}
+
+
+int ffScoreIntron(DNA a, DNA b, DNA y, DNA z, int orientation)
+/* Return a better score the closer an intron is to
+ * consensus. */
+{
+int score = 0;
+int revScore = 0;
+
+if (orientation >= 0)
+    {
+    if (a == 'g' || a == 'G') ++score;
+    if (b == 't' || b == 'T') ++score;
+    if (y == 'a' || y == 'A') ++score;
+    if (z == 'g' || z == 'G') ++score;
+    }
+
+if (orientation <= 0)
+    {
+    if (a == 'c' || a == 'C') ++revScore;
+    if (b == 't' || b == 'T') ++revScore;
+    if (y == 'a' || y == 'A') ++revScore;
+    if (z == 'c' || z == 'C') ++revScore;
+    }
+
+return score > revScore ? score : revScore;
+}
+
+
+static int slideIntron(struct ffAli *left, struct ffAli *right, int orientation)
+/* Slides space between alignments if possible to match
+ * intron consensus better.  Returns how much it slid intron. */
+{
+DNA *nLeft = left->nEnd;
+DNA *hLeft = left->hEnd;
+DNA *nRight = right->nStart;
+DNA *hRight = right->hStart;
+DNA nl, nr, hl, hr;
+DNA *nLeftEnd = left->nStart;
+DNA *nRightEnd = right->nEnd;
+DNA *nBestLeft = NULL;
+int bestScore = -0x7fffffff;
+int curScore;
+int offset;
+
+if (hRight-hLeft < 4)   /* Too short to be an intron. */
+    return 0;
+if (nRight-nLeft > 2)   /* Too big of a gap to be an intron. */
+    return 0;
+
+/* Slide as far to the left as possible without inserting mismatches. */
+while (nLeft > nLeftEnd)
+    {
+    nl = nLeft[-1];
+    hl = hLeft[-1];
+    nr = nRight[-1];
+    hr = hRight[-1];
+    if (!(nl == 'n' && nr == 'n'))  /* N's in needle freely slide. */
+        {
+        if (nr != hr)
+            break;
+        }
+    nLeft -= 1;
+    hLeft -= 1;
+    nRight -= 1;
+    hRight -= 1;
+    }
+/* Slide as far to the right as possible computing
+   intron score as you go. */
+while (nRight < nRightEnd)
+    {
+    curScore = ffScoreIntron(hLeft[0], hLeft[1], hRight[-2], hRight[-1], orientation);
+    if (curScore > bestScore)
+        {
+        bestScore = curScore;
+        nBestLeft = nLeft;
+        }
+    nl = nLeft[0];
+    hl = hLeft[0];
+    if (nl != 'n' && nl != hl)
+        break;
+    nr = nRight[0];
+    hr = hRight[0];
+    nLeft += 1;
+    hLeft += 1;
+    nRight += 1;
+    hRight += 1;
+    }
+if (nBestLeft == NULL)
+    return 0;
+offset = nBestLeft - left->nEnd;
+if (offset == 0)
+    return offset;
+left->nEnd += offset;
+left->hEnd += offset;
+right->nStart += offset;
+right->hStart += offset;
+return offset;
+}
+
+
+boolean ffSlideOrientedIntrons(struct ffAli *ali, int orient)
+/* Slide introns (or spaces between aligned blocks)
+ * to match consensus on given strand. */
+{
+struct ffAli *left = ali, *right;
+boolean slid = FALSE;
+if (left == NULL)
+    return FALSE;
+while((right = left->right) != NULL)
+    {
+    if (slideIntron(left, right, orient))
+        slid = TRUE;
+    left = right;
+    }
+return slid;
+}
+
+boolean ffSlideIntrons(struct ffAli *ali)
+/* Slide introns (or spaces between aligned blocks)
+ * to match consensus.  Return TRUE if any slid. */
+{
+int orient = ffIntronOrientation(ali);
+return ffSlideOrientedIntrons(ali, orient);
+}
+
+struct ffAli *ffRemoveEmptyAlis(struct ffAli *ali, boolean doFree)
+/* Remove empty blocks from list. Optionally free empties too. */
+{
+struct ffAli *leftAli;
+struct ffAli *startAli;
+struct ffAli *rightAli;
+
+if (ali == NULL)
+    return NULL;
+
+/* Figure out left most non-empty ali. */
+while (ali->left)
+    ali = ali->left;
+while (ali)
+    {
+    /* If current ali is empty, chuck it out. */
+    if (ali->nEnd <= ali->nStart || ali->hEnd <= ali->hStart)
+	{
+	struct ffAli *empty = ali;
+        ali = ali->right;
+	if (doFree) freeMem(empty);
+	}
+    else
+        break;
+    }
+
+if (ali == NULL)
+    return NULL;
+
+ali->left = NULL;
+
+/* Get rid of empty middle alis. */
+startAli = leftAli = ali;
+ali = ali->right;
+while (ali)
+    {
+    rightAli = ali->right;
+    if (ali->nEnd <= ali->nStart || ali->hEnd <= ali->hStart)
+        {
+        leftAli->right = rightAli;
+        if (rightAli != NULL)
+            rightAli->left = leftAli;
+	if (doFree) freeMem(ali);
+        }
+    else
+        {
+        leftAli = ali;
+        }
+    ali = rightAli;
+    }
+return startAli;
+}
+
+struct ffAli *ffMergeHayOverlaps(struct ffAli *ali)
+/* Remove overlaps in haystack that perfectly abut in needle.
+ * These are transformed into perfectly abutting haystacks
+ * that have a gap in the needle. */
+{
+struct ffAli *a = NULL;
+struct ffAli *leftA = NULL;
+
+if (ali == NULL)
+    return NULL;
+a = ali;
+for (;;)
+    {
+    int nOverlap;
+    int hOverlap;
+    int aSize;
+
+    /* Advance to next ali */
+    leftA = a;
+    a = a->right;
+    if (a == NULL)
+        break;
+
+    nOverlap = leftA->nEnd - a->nStart;
+    hOverlap = leftA->hEnd - a->hStart;
+    aSize = a->nEnd - a->nStart;
+    if (hOverlap > 0 && hOverlap < aSize && nOverlap <= 0)
+        {
+        a->hStart += hOverlap;
+        a->nStart += hOverlap;
+        }
+    }
+return ali;
+}
+
+struct ffAli *ffMergeNeedleAlis(struct ffAli *ali, boolean doFree)
+/* Remove overlapping areas needle in alignment. Assumes ali is sorted on
+ * ascending nStart field. Also merge perfectly abutting neighbors.*/
+{
+struct ffAli *a = NULL;
+struct ffAli *leftA = NULL;
+struct ffAli *rightA;
+
+if (ali == NULL)
+    return NULL;
+rightA = ali;
+for (;;)
+    {
+    /* Advance to next ali */
+    leftA = a;
+    a = rightA;
+    if (a == NULL)
+        break;
+    rightA = a->right;
+    
+
+    /* See if can merge current alignment into left one. */
+    if (leftA != NULL)
+        {
+        int overlap = leftA->nEnd - a->nStart;
+
+        /* Deal with overlaps in needle */
+        if (overlap > 0)
+            {
+            /* See if left encompasses current segment. */
+            if (leftA->nStart <= a->nStart && leftA->nEnd >= a->nEnd)
+                {
+                /* Eliminate current segment. */
+                leftA->right = rightA;
+                if (rightA != NULL)
+                    rightA->left = leftA;
+		if (doFree) freeMem(a);
+                a = leftA;
+                }
+            else
+                {
+                /* Remove overlapping area from current segment, leave
+                 * it in left segment. */
+                a->hStart += overlap;
+                a->nStart += overlap;
+                }
+            }
+        else if (overlap == 0 && leftA->hEnd == a->hStart)
+            {
+            /* Remove current segment from list. */
+            leftA->right = rightA;
+            if (rightA != NULL)
+                rightA->left = leftA;
+            /* Fold data from current segment into left segment */
+            leftA->nEnd = a->nEnd;
+            leftA->hEnd = a->hEnd;
+	    if (doFree) freeMem(a); 
+	    a = leftA;
+            }
+        }
+    }
+return ali;
+}
+
--- a/src/jkOwnLib/ffSeedExtend.c
+++ b/src/jkOwnLib/ffSeedExtend.c
--- a/src/jkOwnLib/fuzzyFind.c
+++ b/src/jkOwnLib/fuzzyFind.c
--- a/src/jkOwnLib/genoFind.c
+++ b/src/jkOwnLib/genoFind.c
--- a/src/jkOwnLib/gfBlatLib.c
+++ b/src/jkOwnLib/gfBlatLib.c
--- a/src/jkOwnLib/gfClientLib.c
+++ b/src/jkOwnLib/gfClientLib.c
+/* gfClientLib - stuff that both blat and pcr clients of
+ * genoFind use. */
+/* Copyright 2001-2005 Jim Kent.  All rights reserved. */
+
+#include "common.h"
+#include "hash.h"
+#include "linefile.h"
+#include "obscure.h"
+#include "dnaseq.h"
+#include "fa.h"
+#include "nib.h"
+#include "twoBit.h"
+#include "repMask.h"
+#include "gfClientLib.h"
+
+void gfClientFileArray(char *fileName, char ***retFiles, int *retFileCount)
+/* Check if file if .2bit or .nib or .fa.  If so return just that
+ * file in a list of one.  Otherwise read all file and treat file
+ * as a list of filenames.  */
+{
+boolean gotSingle = FALSE;
+char *buf;		/* This will leak memory but won't matter. */
+
+if (nibIsFile(fileName) || twoBitIsSpec(fileName)
+    || sameString(fileName, "stdin")
+    || endsWith(fileName, ".Z")
+    || endsWith(fileName, ".gz")
+    || endsWith(fileName, ".bz2"))
+    gotSingle = TRUE;
+/* Detect .fa files (where suffix is not standardized)
+ * by first character being a '>'. */
+else
+    {
+    FILE *f = mustOpen(fileName, "r");
+    char c = fgetc(f);
+    fclose(f);
+    if (c == '>')
+        gotSingle = TRUE;
+    }
+if (gotSingle)
+    {
+    char **files;
+    *retFiles = AllocArray(files, 1);
+    files[0] = cloneString(fileName);
+    *retFileCount = 1;
+    return;
+    }
+else
+    {
+    readAllWords(fileName, retFiles, retFileCount, &buf);
+    }
+}
+
+
+void gfClientUnmask(struct dnaSeq *seqList)
+/* Unmask all sequences. */
+{
+struct dnaSeq *seq;
+for (seq = seqList; seq != NULL; seq = seq->next)
+    faToDna(seq->dna, seq->size);
+}
+
+static void maskFromOut(struct dnaSeq *seqList, char *outFile, float minRepDivergence)
+/* Mask DNA sequence by putting bits more than 85% identical to
+ * repeats as defined in RepeatMasker .out file to upper case. */
+{
+struct lineFile *lf = lineFileOpen(outFile, TRUE);
+struct hash *hash = newHash(0);
+struct dnaSeq *seq;
+char *line;
+
+for (seq = seqList; seq != NULL; seq = seq->next)
+    hashAdd(hash, seq->name, seq);
+if (!lineFileNext(lf, &line, NULL))
+    errAbort("Empty mask file %s\n", lf->fileName);
+if (!startsWith("There were no", line))	/* No repeats is ok. Not much work. */
+    {
+    if (!startsWith("   SW", line))
+	errAbort("%s isn't a RepeatMasker .out file.", lf->fileName);
+    if (!lineFileNext(lf, &line, NULL) || !startsWith("score", line))
+	errAbort("%s isn't a RepeatMasker .out file.", lf->fileName);
+    lineFileNext(lf, &line, NULL);  /* Blank line. */
+    while (lineFileNext(lf, &line, NULL))
+	{
+	char *words[32];
+	struct repeatMaskOut rmo;
+	int wordCount;
+	int seqSize;
+	int repSize;
+	wordCount = chopLine(line, words);
+	if (wordCount < 14)
+	    errAbort("%s line %d - error in repeat mask .out file\n", lf->fileName, lf->lineIx);
+	repeatMaskOutStaticLoad(words, &rmo);
+	/* If repeat is more than 15% divergent don't worry about it. */
+	if (rmo.percDiv + rmo.percDel + rmo.percInc <= minRepDivergence)
+	    {
+	    if((seq = hashFindVal(hash, rmo.qName)) == NULL)
+		errAbort("%s is in %s but not corresponding sequence file, files out of sync?\n", 
+			rmo.qName, lf->fileName);
+	    seqSize = seq->size;
+	    if (rmo.qStart <= 0 || rmo.qStart > seqSize || rmo.qEnd <= 0 
+	    	|| rmo.qEnd > seqSize || rmo.qStart > rmo.qEnd)
+		{
+		warn("Repeat mask sequence out of range (%d-%d of %d in %s)\n",
+		    rmo.qStart, rmo.qEnd, seqSize, rmo.qName);
+		if (rmo.qStart <= 0)
+		    rmo.qStart = 1;
+		if (rmo.qEnd > seqSize)
+		    rmo.qEnd = seqSize;
+		}
+	    repSize = rmo.qEnd - rmo.qStart + 1;
+	    if (repSize > 0)
+		toUpperN(seq->dna + rmo.qStart - 1, repSize);
+	    }
+	}
+    }
+freeHash(&hash);
+lineFileClose(&lf);
+}
+
+static void maskNucSeqList(struct dnaSeq *seqList, char *seqFileName, char *maskType,
+	boolean hardMask, float minRepDivergence)
+/* Apply masking to simple nucleotide sequence by making masked nucleotides
+ * upper case (since normal DNA sequence is lower case for us). */
+{
+struct dnaSeq *seq;
+char *outFile = NULL, outNameBuf[512];
+
+if (sameWord(maskType, "upper"))
+    {
+    /* Already has dna to be masked in upper case. */
+    }
+else if (sameWord(maskType, "lower"))
+    {
+    for (seq = seqList; seq != NULL; seq = seq->next)
+	toggleCase(seq->dna, seq->size);
+    }
+else
+    {
+    /* Masking from a RepeatMasker .out file. */
+    if (sameWord(maskType, "out"))
+	{
+	sprintf(outNameBuf, "%s.out", seqFileName);
+	outFile = outNameBuf;
+	}
+    else
+	{
+	outFile = maskType;
+	}
+    gfClientUnmask(seqList);
+    maskFromOut(seqList, outFile, minRepDivergence);
+    }
+if (hardMask)
+    {
+    for (seq = seqList; seq != NULL; seq = seq->next)
+	upperToN(seq->dna, seq->size);
+    }
+}
+
+bioSeq *gfClientSeqList(int fileCount, char *files[], 
+	boolean isProt, boolean isTransDna, char *maskType, 
+	float minRepDivergence, boolean showStatus)
+/* From an array of .fa and .nib file names, create a
+ * list of dnaSeqs, which are set up so that upper case is masked and lower case
+ * is unmasked sequence. (This is the opposite of the input, but set so that
+ * we can use lower case as our primary DNA sequence, which historically predates
+ * our use of lower case masking.)  Protein sequence on the other hand is
+ * all upper case. */
+
+{
+int i;
+char *fileName;
+bioSeq *seqList = NULL, *seq;
+boolean doMask = (maskType != NULL);
+
+for (i=0; i<fileCount; ++i)
+    {
+    struct dnaSeq *list = NULL, sseq;
+    ZeroVar(&sseq);
+    fileName = files[i];
+    if (nibIsFile(fileName))
+	list = nibLoadAllMasked(NIB_MASK_MIXED|NIB_BASE_NAME, fileName);
+    else if (twoBitIsSpec(fileName))
+	list = twoBitLoadAll(fileName);
+    else if (isProt)
+      list = faReadAllPep(fileName);
+    else
+      list = faReadAllMixed(fileName);
+
+    /* If necessary mask sequence from file. */
+    if (doMask)
+	{
+	maskNucSeqList(list, fileName, maskType, isTransDna, minRepDivergence);
+	}
+    else
+        {
+	/* If not masking send everything to proper case here. */
+	for (seq = list; seq != NULL; seq = seq->next)
+	    {
+	    if (isProt)
+		faToProtein(seq->dna, seq->size);
+	    else
+		faToDna(seq->dna, seq->size);
+	    }
+	}
+    /* Move local list to end of bigger list. */
+    seqList = slCat(seqList, list);
+    }
+if (showStatus)
+    {
+    /* Total up size and sequence count and report. */
+    int count = 0; 
+    unsigned long totalSize = 0;
+    for (seq = seqList; seq != NULL; seq = seq->next)
+        {
+	totalSize += seq->size;
+	count += 1;
+	}
+    printf("Loaded %lu letters in %d sequences\n", totalSize, count);
+    }
+return seqList;
+}
+
--- a/src/jkOwnLib/gfInternal.c
+++ b/src/jkOwnLib/gfInternal.c
+/* Some stuff shared by gfClient and gfPcr. 
+ * Copyright 2001-2004 Jim Kent. All rights reserved. */
+
+#include "common.h"
+#include "linefile.h"
+#include "dnaseq.h"
+#include "genoFind.h"
+#include "gfInternal.h"
+#include "errAbort.h"
+#include "nib.h"
+#include "twoBit.h"
+
+
+
+static int extendRespect(int oldX, int newX)
+/* Return newX modified slightly so as to be in same frame as oldX. */
+{
+int frame = oldX % 3;
+newX = newX - (newX % 3) + frame;
+return newX;
+}
+
+void gfiExpandRange(struct gfRange *range, int qSize, int tSize, 
+	boolean respectFrame, boolean isRc, int expansion)
+/* Expand range to cover an additional 500 bases on either side. */
+{
+int x;
+
+x = range->qStart - expansion;
+if (x < 0) x = 0;
+range->qStart = x;
+
+x = range->qEnd + expansion;
+if (x > qSize) x = qSize;
+range->qEnd = x;
+
+x = range->tStart - expansion;
+if (x < 0) x = 0;
+if (respectFrame && !isRc) 
+    {
+    x = extendRespect(range->tStart, x);
+    }
+range->tStart = x;
+
+x = range->tEnd + expansion;
+if (x > tSize) x = tSize;
+if (respectFrame && isRc)
+    {
+    x = extendRespect(range->tEnd, x);
+    if (x > tSize)
+        x -= 3;
+    }
+range->tEnd = x;
+}
+
+struct dnaSeq *gfiExpandAndLoadCached(struct gfRange *range, 
+	struct hash *tFileCache, char *tSeqDir, int querySize, 
+	int *retTotalSeqSize, boolean respectFrame, boolean isRc, int expansion)
+/* Expand range to cover an additional expansion bases on either side.
+ * Load up target sequence and return. (Done together because don't
+ * know target size before loading.) */
+{
+struct dnaSeq *target = NULL;
+char fileName[PATH_LEN+256];
+
+safef(fileName, sizeof(fileName), "%s/%s", tSeqDir, range->tName);
+if (nibIsFile(fileName))
+    {
+    struct nibInfo *nib = hashFindVal(tFileCache, fileName);
+    if (nib == NULL)
+        {
+	nib = nibInfoNew(fileName);
+	hashAdd(tFileCache, fileName, nib);
+	}
+    if (isRc)
+	reverseIntRange(&range->tStart, &range->tEnd, nib->size);
+    gfiExpandRange(range, querySize, nib->size, respectFrame, isRc, expansion);
+    target = nibLdPart(fileName, nib->f, nib->size, 
+    	range->tStart, range->tEnd - range->tStart);
+    if (isRc)
+	{
+	reverseComplement(target->dna, target->size);
+	reverseIntRange(&range->tStart, &range->tEnd, nib->size);
+	}
+    *retTotalSeqSize = nib->size;
+    }
+else
+    {
+    struct twoBitFile *tbf = NULL;
+    // split a filename+chrom like "/gbdb/hg19/hg19.2bit:chr1"
+    // (can also be URL like "http://hgdownload.soe.ucsc.edu/gbdb/hg19/hg19.2bit:chr1")
+    // into filename and tSeqName
+    char *tSeqName = strrchr(fileName, ':');
+    int tSeqSize = 0;
+    if (tSeqName == NULL)
+        errAbort("No colon in .2bit response from gfServer");
+    *tSeqName++ = 0;
+    tbf = hashFindVal(tFileCache, fileName);
+    if (tbf == NULL)
+        {
+	tbf = twoBitOpen(fileName);
+	hashAdd(tFileCache, fileName, tbf);
+	}
+    tSeqSize = twoBitSeqSize(tbf, tSeqName);
+    if (isRc)
+	reverseIntRange(&range->tStart, &range->tEnd, tSeqSize);
+    gfiExpandRange(range, querySize, tSeqSize, respectFrame, isRc, expansion);
+    target = twoBitReadSeqFragLower(tbf, tSeqName, range->tStart, range->tEnd);
+    if (isRc)
+	{
+	reverseComplement(target->dna, target->size);
+	reverseIntRange(&range->tStart, &range->tEnd, tSeqSize);
+	}
+    *retTotalSeqSize = tSeqSize;
+    }
+return target;
+}
+
+void gfiGetSeqName(char *spec, char *name, char *file)
+/* Extract sequence name and optionally file name from spec, 
+ * which includes nib and 2bit files.  (The file may be NULL
+ * if you don't care.) */
+{
+if (nibIsFile(spec))
+    {
+    splitPath(spec, NULL, name, NULL);
+    if (file != NULL)
+        strcpy(file, spec);
+    }
+else
+    {
+    char *s = strchr(spec, ':');
+    if (s == NULL)
+	errAbort("Expecting colon in %s", spec);
+    strcpy(name, s+1);
+    if (file != NULL)
+        {
+	int fileNameSize = s - spec;
+	memcpy(file, spec, fileNameSize);
+	file[fileNameSize] = 0;
+	}
+    }
+}
+
--- a/src/jkOwnLib/gfInternal.h
+++ b/src/jkOwnLib/gfInternal.h
--- a/src/jkOwnLib/gfOut.c
+++ b/src/jkOwnLib/gfOut.c
--- a/src/jkOwnLib/gfPcrLib.c
+++ b/src/jkOwnLib/gfPcrLib.c
--- a/src/jkOwnLib/gfWebLib.c
+++ b/src/jkOwnLib/gfWebLib.c
--- a/src/jkOwnLib/makefile
+++ b/src/jkOwnLib/makefile
+kentSrc = ..
+include ../inc/common.mk
+
+O = bandExt.o crudeali.o ffAliHelp.o ffSeedExtend.o fuzzyFind.o \
+    genoFind.o gfBlatLib.o gfClientLib.o gfInternal.o gfOut.o gfPcrLib.o gfWebLib.o ooc.o \
+    patSpace.o splix.o supStitch.o trans3.o xenbig.o xensmall.o 
+
+T = ../lib/$(MACHTYPE)/jkOwnLib.a
+
+$(T): $(O) ../lib/$(MACHTYPE)
+	ar rcus $(T) $(O)
+
+../lib/$(MACHTYPE):
+	mkdir ../lib/$(MACHTYPE)
+
+clean:
+	rm -f ${O} ../lib/$(MACHTYPE)/jkOwnLib.a
+
+tags:
+	etags ../inc/*.h ../lib/*.h ../lib/*.c  ../hg/inc/*.h ../hg/lib/*.h ../hg/lib/*.c ../hg/hgTracks/hgTracks.c ../hg/hgc/hgc.c ../hg/hgTrackUi/hgTrackUi.c
--- a/src/jkOwnLib/ooc.c
+++ b/src/jkOwnLib/ooc.c
--- a/src/jkOwnLib/patSpace.c
+++ b/src/jkOwnLib/patSpace.c