Commit 08c3744e authored by Steffen Möller's avatar Steffen Möller

New upstream version 1.20

parents
>ARMTM40TR
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 19 00 00 00 00
00 00 15 15 17 00 00 00 00 00 00 00 16 21 25 28 21
17 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 15 00 00 00 00 17 23 24 24 24 24 20 20 33 33 33
33 33 33 33 33 33 40 40 36 36 36 33 33 33 33 33 33
37 37 35 35 35 35 36 36 36 36 37 42 40 40 33 33 33
33 33 35 37 40 40 45 40 40 36 36 36 36 38 38 49 49
43 43 33 33 33 33 33 33 42 37 37 37 45 40 41 37 45
37 37 37 34 34 42 35 35 34 34 34 34 40 40 40 49 49
45 33 33 33 33 33 33 41 45 45 45 45 45 45 45 49 49
49 49 49 45 45 45 45 45 45 41 41 41 45 41 37 37 37
37 40 40 40 42 37 41 51 45 45 49 45 45 45 45 45 45
45 45 45 51 51 51 51 51 51 51 51 51 45 45 45 49 49
40 39 42 40 39 39 39 41 41 39 39 39 39 39 39 45 51
51 51 51 51 45 45 41 41 41 51 51 51 51 51 51 51 45
45 45 40 40 40 45 45 43 51 43 45 45 43 43 51 51 51
45 41 41 41 51 51 51 51 51 39 39 39 39 39 40 45 41
41 51 51 51 45 45 51 51 45 45 45 45 45 45 45 51 51
51 51 51 51 51 51 51 41 41 41 41 41 45 51 51 51 51
51 45 45 45 45 40 40 49 49 49 45 51 51 51 41 41 41
41 43 45 45 45 37 37 37 41 39 39 40 40 39 45 45 45
45 51 45 41 37 36 30 30 30 34 38 40 40 49 51 51 51
51 51 51 39 39 40 40 39 38 38 42 42 42 37 37 45 51
51 51 51 51 51 51 51 51 51 51 51 51 51 51 45 40 40
40 37 37 37 45 45 45 49 49 45 45 45 45 45 45 37 37
45 37 37 37 45 45 45 45 45 45 45 45 45 45 40 40 42
35 35 35 34 37 37 40 40 37 37 41 41 41 44 45 45 45
45 37 31 31 31 31 34 34 37 41 41 31 31 28 32 32 34
35 35 35 35 32 32 24 32 32 37 34 34 34 34 34 34 34
34 34 35 38 34 34 34 38 37 37 37 37 37 29 29 27 34
27 33 30 33 33 33 40 45 49 45 38 35 27 24 24 32 32
32 32 26 29 28 32 00 00 00 00 00 00 18 29 34 34 29
31 23 18 00 00 18 23 31 34 33 28 30 30 30 24 31 34
34 32 32 26 26 29 31 25 26 00 00 00 25 32 36 32 19
00 00 00 00 00 20 23 31 30 25 24 24 29 30 30 24 18
16 00 00 00 25 25 25 00 00 00 20 18 15 00 00 00 00
00 18 17 25 15 00 00 00 00 17 15 00 00 00 17 00 00
00 32 32 29 29 20 25 00 00 00 20 15 25 20 20 20 18
00 00 00 00 00 00 00 00 00 00 00 00 00 00 16 18 15
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 17 20 25 00 00 00 00 00 00 00 00 00 00 00
00 00 00 15 00 00 00 00 00 00 00 16 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 15 15 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 00 00 00 00 00 00
>ARMTM40TR
TTAGTCTGAAGNTGTTGAACCCAATATCGTTGCGACGCAGCGGTGGCGTGGGGTCGNTAA
TATTAAATGATTAGAATCATAATTTTGAAAAAGACTCTTATAAATAAGAAATTATGGTCA
TAAAAAATAAATCCCCCTACGACACCNCTGCTCTAATACGANTCACTATAAGGAAAGCTC
GTACGCCTGCAGGCTTAGGATACGCCAAGCTCTAATACGACTCACTATAGGGAAAGCTGG
TACGCCTGCAGGTACCGGTCCGGAATTCCCGGGTCGACCCACGCGTCCGGGCGCACGCCG
GTAGGATCTGCCTCCCGAGCTCTGGGGTGAAATGTGTGTGCCACCTCTTCCATCCCCAAA
TGGTGTTCTTTTAAGGCCAGAGGGTGTGTTGTCATATAGAAATCCCACATTCCAGGTCTG
ATGTTTCCTCCTTTAGCCCACCGGCTTTCAAGGGCAATTTTATATTTTGTGCTGATGTGG
CATTTCCCTCCTGCCTGTAGTGTGCCTCCTTCCACTGCCACCTGAAGGGAGGGCCGAACC
AGCTTGGAGGAGGGGAAGGGGCTCCCCAAAGAAGCAGCATCACATTGGATAGGGGCTGAG
GGCTCTGGGTGGGATGGCCTGCTGGACTGACCATGGCAGGCCAGCCAGGCTGGGCACTGA
GTTGTCAGAATAATTGTGCACAGCTTAGAGGAGCCGGGCTGCCCCATGCTAAGCATGGCA
TCTGTCTTTCTCCATCTCTTTCCCTTTCTGTCTTTGTTTTGGTTTCCCTGCTTCTGTTCT
GTGGGGTCTTCATTCTCTCGTTGGTCAATGTGCACTGGAGCAGAGACCAGACCTCAAGCT
ATGACAGGGAGGGCTTTGACAGAAAAAGGCCCTCCTTTGGGCTGAGGTGTGAAAAGACTG
GGTACAGGGGGTGGCCCACGGTAGTCTTCCTCCATAAAATGAGGGAGATGAAAGGTTTAA
ATTGGGGTTTTCATCC
Copyright 1995-2004 The Institute for Genomic Research. All rights
reserved.
This software is provided "AS IS". The Institute for Genomic Research
makes no representation or warranty, express or implied, including
without limitation any warranties of merchantability or fitness for a
particular purpose, associated with the receipt or use of this
software.
This software program may not be sold, leased, transferred, exported
or otherwise disclosed to anyone, in whole or in part, without written
consent of The Institute for Genomic Research.
This diff is collapsed.
The Artistic License
Preamble
The intent of this document is to state the conditions under which a
Package may be copied, such that the Copyright Holder maintains some
semblance of artistic control over the development of the package,
while giving the users of the package the right to use and distribute
the Package in a more-or-less customary fashion, plus the right to
make reasonable modifications.
Definitions:
* "Package" refers to the collection of files distributed by the
Copyright Holder, and derivatives of that collection of files
created through textual modification.
* "Standard Version" refers to such a Package if it has not been
modified, or has been modified in accordance with the wishes of
the Copyright Holder.
* "Copyright Holder" is whoever is named in the copyright or
copyrights for the package.
* "You" is you, if you're thinking about copying or distributing
this Package.
* "Reasonable copying fee" is whatever you can justify on the
basis of media cost, duplication charges, time of people
involved, and so on. (You will not be required to justify it to
the Copyright Holder, but only to the computing community at
large as a market that must bear the fee.)
* "Freely Available" means that no fee is charged for the item
itself, though there may be fees involved in handling the
item. It also means that recipients of the item may redistribute
it under the same conditions they received it.
1. You may make and give away verbatim copies of the source form of
the Standard Version of this Package without restriction, provided
that you duplicate all of the original copyright notices and
associated disclaimers.
2. You may apply bug fixes, portability fixes and other modifications
derived from the Public Domain or from the Copyright Holder. A
Package modified in such a way shall still be considered the
Standard Version.
3. You may otherwise modify your copy of this Package in any way,
provided that you insert a prominent notice in each changed file
stating how and when you changed that file, and provided that you
do at least ONE of the following:
a) place your modifications in the Public Domain or otherwise make
them Freely Available, such as by posting said modifications to
Usenet or an equivalent medium, or placing the modifications on a
major archive site such as ftp.uu.net, or by allowing the
Copyright Holder to include your modifications in the Standard
Version of the Package.
b) use the modified Package only within your corporation or
organization.
c) rename any non-standard executables so the names do not
conflict with standard executables, which must also be provided,
and provide a separate manual page for each non-standard
executable that clearly documents how it differs from the Standard
Version.
d) make other distribution arrangements with the Copyright Holder.
4. You may distribute the programs of this Package in object code or
executable form, provided that you do at least ONE of the
following:
a) distribute a Standard Version of the executables and library
files, together with instructions (in the manual page or
equivalent) on where to get the Standard Version.
b) accompany the distribution with the machine-readable source of
the Package with your modifications.
c) accompany any non-standard executables with their corresponding
Standard Version executables, giving the non-standard executables
non-standard names, and clearly documenting the differences in
manual pages (or equivalent), together with instructions on where
to get the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
5. You may charge a reasonable copying fee for any distribution of
this Package. You may charge any fee you choose for support of this
Package. You may not charge a fee for this Package itself. However,
you may distribute this Package in aggregate with other (possibly
commercial) programs as part of a larger (possibly commercial)
software distribution provided that you do not advertise this
Package as a product of your own.
6. The scripts and library files supplied as input to or produced as
output from the programs of this Package do not automatically fall
under the copyright of this Package, but belong to whomever
generated them, and may be sold commercially, and may be aggregated
with this Package.
7. C or perl subroutines supplied by you and linked into this Package
shall not be considered part of this Package.
8. The name of the Copyright Holder may not be used to endorse or
promote products derived from this software without specific prior
written permission.
9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES
OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
The End
This license is approved by the Open Source Initiative
(www.opensource.org) for certifying software as OSI Certified Open
Source.
CC = cc
CFLAGS = -O
LIBS = -lpthread
OBJS = lucy.o qual_trim.o abi.o vector.o splice.o poly.o
all: lucy beep
lucy: $(OBJS)
rm -f $@
$(CC) $(CFLAGS) -o $@ $(OBJS) $(LIBS)
beep:
@echo done!
clean:
rm -f $(OBJS) lucy
>PUC19 pUC-JK BstXI Cloning Vector (high copy number plasmid derived from pUC19)
tcgcgcgtttcggtgatgacggtgaaaacctctgacacatgcagctcccggagacggtca
cagcttgtctgtaagcggatgccgggagcagacaagcccgtcagggcgcgtcagcgggtg
ttggcgggtgtcggggctggcttaactatgcggcatcagagcagattgtactgagagtgc
accatatgcggtgtgaaataccgcacagatgcgtaaggagaaaataccgcatcaggcgcc
attcgccattcaggctgcgcaactgttgggaagggcgatcggtgcgggcctcttcgctat
tacgccagctggcgaaagggggatgtgctgcaaggcgattaagttgggtaacgccagggt
tttcccagtcacgacgttgtaaaacgacggccagtgaattcccaatgtgctgggtacccg
ggatcctgcaggtcgaccagcacactggcaagcttggcgtaatcatggtcatagctgttt
cctgtgtgaaattgttatccgctcacaattccacacaacatacgagccggaagcataaag
tgtaaagcctggggtgcctaatgagtgagctaactcacattaattgcgttgcgctcactg
cccgctttccagtcgggaaacctgtcgtgccagctgcattaatgaatcggccaacgcgcg
gggagaggcggtttgcgtattgggcgctcttccgcttcctcgctcactgactcgctgcgc
tcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatcc
acagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccagg
aaccgtaaaaaggccgcgttgctggcgtttttccataggctccgcccccctgacgagcat
cacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccag
gcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccgga
tacctgtccgcctttctcccttcgggaagcgtggcgctttctcaatgctcacgctgtagg
tatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgtt
cagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacac
gacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggc
ggtgctacagagttcttgaagtggtggcctaactacggctacactagaaggacagtattt
ggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatcc
ggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgc
agaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtgg
aacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctag
atccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttgg
tctgacagttaccaatgcttaatcagtgaggcacctatctcagcgatctgtctatttcgt
tcatccatagttgcctgactccccgtcgtgtagataactacgatacgggagggcttacca
tctggccccagtgctgcaatgataccgcgagacccacgctcaccggctccagatttatca
gcaataaaccagccagccggaagggccgagcgcagaagtggtcctgcaactttatccgcc
tccatccagtctattaattgttgccgggaagctagagtaagtagttcgccagttaatagt
ttgcgcaacgttgttgccattgctacaggcatcgtggtgtcacgctcgtcgtttggtatg
gcttcattcagctccggttcccaacgatcaaggcgagttacatgatcccccatgttgtgc
aaaaaagcggttagctccttcggtcctccgatcgttgtcagaagtaagttggccgcagtg
ttatcactcatggttatggcagcactgcataattctcttactgtcatgccatccgtaaga
tgcttttctgtgactggtgagtactcaaccaagtcattctgagaatagtgtatgcggcga
ccgagttgctcttgcccggcgtcaatacgggataataccgcgccacatagcagaacttta
aaagtgctcatcattggaaaacgttcttcggggcgaaaactctcaaggatcttaccgctg
ttgagatccagttcgatgtaacccactcgtgcacccaactgatcttcagcatcttttact
ttcaccagcgtttctgggtgagcaaaaacaggaaggcaaaatgccgcaaaaaagggaata
agggcgacacggaaatgttgaatactcatactcttcctttttcaatattattgaagcatt
tatcagggttattgtctcatgagcggatacatatttgaatgtatttagaaaaataaacaa
ataggggttccgcgcacatttccccgaaaagtgccacctgacgtctaagaaaccattatt
atcatgacattaacctataaaaataggcgtatcacgaggccctttcgtc
>PUC19.for.begin
gcgtaaggagaaaataccgcatcaggcgccattcgccattcaggctgcgcaactgttgggaagggcgatc
ggtgcgggcctcttcgctattacgccagctggcgaaagggggatgtgctgcaaggcgattaagttgggta
acgccagggttttcccagtcacgacgttgtaaaacgacggccagtgaattcccaatgtgCTGGAAAGGG
>PUC19.for.end
CCCTTTCCAGCACActgggtacccgggatcctgcaggtcgaccagcacaCTGGcaagcttggcgtaatca
tggtcatagctgtttcctgtgtgaaattgttatccgctcacaattccacacaacatacgagccggaagca
taaagtgtaaagcctggggtgcctaatgagtgagctaactcacattaattgcgttgcgctcactgcccgc
>PUC19.rev.begin
gcgggcagtgagcgcaacgcaattaatgtgagttagctcactcattaggcaccccaggct
ttacactttatgcttccggctcgtatgttgtgtggaattgtgagcggataacaatttcac
acaggaaacagctatgaccatgattacgccaagcttgCCAGtgtgctggtcgacctgcag
gatcccgggtacccagTGTGCTGGAAAGGG
>PUC19.rev.end
CCCTTTCCAGcacattgggaattcactggccgtcgttttacaacgtcgtgactgggaaaa
ccctggcgttacccaacttaatcgccttgcagcacatccccctttcgccagctggcgtaa
tagcgaagaggcccgcaccgatcgcccttcccaacagttgcgcagcctgaatggcgaatg
gcgcctgatgcggtattttctccttacgc
>PUC19.for.begin
gcgtaaggagaaaataccgcatcaggcgccattcgccattcaggctgcgcaactgttgggaagggcgatc
ggtgcgggcctcttcgctattacgccagctggcgaaagggggatgtgctgcaaggcgattaagttgggta
acgccagggttttcccagtcacgacgttgtaaaacgacggccagtgaattcccaatgtgCTGGAAAGGG
>PUC19.for.end
CCCTTTCCAGCACActgggtacccgggatcctgcaggtcgaccagcacaCTGGcaagcttggcgtaatca
tggtcatagctgtttcctgtgtgaaattgttatccgctcacaattccacacaacatacgagccggaagca
taaagtgtaaagcctggggtgcctaatgagtgagctaactcacattaattgcgttgcgctcactgcccgc
>PUC19.rev.begin
gcgggcagtgagcgcaacgcaattaatgtgagttagctcactcattaggcaccccaggct
ttacactttatgcttccggctcgtatgttgtgtggaattgtgagcggataacaatttcac
acaggaaacagctatgaccatgattacgccaagcttgCCAGtgtgctggtcgacctgcag
gatcccgggtacccagTGTGCTGGAAAGGG
>PUC19.rev.end
CCCTTTCCAGcacattgggaattcactggccgtcgttttacaacgtcgtgactgggaaaa
ccctggcgttacccaacttaatcgccttgcagcacatccccctttcgccagctggcgtaa
tagcgaagaggcccgcaccgatcgcccttcccaacagttgcgcagcctgaatggcgaatg
gcgcctgatgcggtattttctccttacgc
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
CC = cc
CFLAGS = -O
LIBS =
OBJS = lucy.o qual_trim.o abi.o vector.o splice.o poly.o
all: lucy beep
lucy: $(OBJS)
rm -f $@
$(CC) $(CFLAGS) -o $@ $(OBJS) $(LIBS)
beep:
@echo done!
clean:
rm -f $(OBJS) lucy
This diff is collapsed.
This diff is collapsed.
/****************************************************************************
*
* Copyright (c) 2003, The Institute for Genomic Research (TIGR), Rockville,
* Maryland, U.S.A. All rights reserved.
*
****************************************************************************/
#define A 0
#define T 3
int cdna, cerr, crng, keep;
int poly_at_left(seq, len)
char *seq;
int len;
{
register int i, err, ttt, pos;
/* find the first 'cdna' number of connected T's in the first
'crng' bases of the input sequence */
for (i=pos=ttt=0; i<crng && i<len; i++, seq++)
if (abi_code(*seq)==T) {
ttt++;
if (ttt>=cdna)
break;
} else
ttt=0;
if (i>=crng || i>=len) /* found nothing within 'crng', return nil */
return pos;
if (keep) /* keep the poly-T tag for identification purpose */
return i-ttt+1;
/* extend span of poly-T within 'cerr' error tolerance */
for (i++, seq++, err=0; i<len; i++, seq++)
if (abi_code(*seq)==T) {
ttt++;
if (ttt>=cdna)
err=0;
} else {
if (err<=0)
pos=i;
err++; ttt=0;
if (err>cerr)
return pos;
}
return len;
}
int poly_at_right(seq, len)
char *seq;
int len;
{
register int i, err, aaa, pos;
/* find the last 'cdna' number of connected A's in the last
'crng' bases of the input sequence */
for (i=pos=aaa=0; i<crng && i<len; i++, seq--)
if (abi_code(*seq)==A) {
aaa++;
if (aaa>=cdna)
break;
} else
aaa=0;
if (i>=crng || i>=len) /* found nothing within 'crng', return nil */
return pos;
if (keep) /* keep the poly-A tag for identification purpose */
return i-aaa+1;
/* extend span of poly-A within 'cerr' error tolerance */
for (i++, seq--, err=0; i<len; i++, seq--)
if (abi_code(*seq)==A) {
aaa++;
if (aaa>=cdna)
err=0;
} else {
if (err<=0)
pos=i;
err++; aaa=0;
if (err>cerr)
return pos;
}
return len;
}
This diff is collapsed.
/****************************************************************************
*
* Copyright (c) 2003, The Institute for Genomic Research (TIGR), Rockville,
* Maryland, U.S.A. All rights reserved.
*
****************************************************************************/
#define NIL 0
#define LFT 1
#define DIA 2
#define TOP 3
#define Score(x, y) splice_score_base[(y)*width+(x)]
#define Direc(x, y) splice_direc_base[(y)*width+(x)]
int *splice_match_base;
int *splice_score_base;
char *splice_direc_base;
static int match[16][16]={
/* A C G T U R Y M W S K D H V B N */
/*A*/ { 5,-14,-14,-14,-14, 2,-14, 2, 2,-14,-14, 1, 1, 1,-14, 0,},
/*C*/ {-14, 5,-14,-14,-14,-14, 2, 2,-14, 2,-14,-14, 1, 1, 1, 0,},
/*G*/ {-14,-14, 5,-14,-14, 2,-14,-14,-14, 2, 2, 1,-14, 1, 1, 0,},
/*T*/ {-14,-14,-14, 5, 5,-14, 2,-14, 2,-14, 2, 1, 1,-14, 1, 0,},
/*U*/ {-14,-14,-14, 5, 5,-14, 2,-14, 2,-14, 2, 1, 1,-14, 1, 0,},
/*R*/ { 2,-14, 2,-14,-14, 2,-14, 1, 1, 1, 1, 1, 0, 1, 0, 0,},
/*Y*/ {-14, 2,-14, 2, 2,-14, 2, 1, 1, 1, 1, 0, 1, 0, 1, 0,},
/*M*/ { 2, 2,-14,-14,-14, 1, 1, 2, 1, 1,-14, 0, 1, 1, 0, 0,},
/*W*/ { 2,-14,-14, 2, 2, 1, 1, 1, 2,-14, 1, 1, 1, 0, 0, 0,},
/*S*/ {-14, 2, 2,-14,-14, 1, 1, 1,-14, 2, 1, 0, 0, 1, 1, 0,},
/*K*/ {-14,-14, 2, 2, 2, 1, 1,-14, 1, 1, 2, 1, 0, 0, 1, 0,},
/*D*/ { 1,-14, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0,},
/*H*/ { 1, 1,-14, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0,},
/*V*/ { 1, 1, 1,-14,-14, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,},
/*B*/ {-14, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,},
/*N*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
/* A C G T U R Y M W S K D H V B N */
};
#define GAPOPEN -17
#define GAPGROW -5
static int left_penalty[]={ GAPOPEN, GAPGROW, GAPOPEN, GAPOPEN };
static int top_penalty[]={ GAPOPEN, GAPOPEN, GAPOPEN, GAPGROW };
void splice_align_left(a, width, b, height, offset, good, right_end)
char *a, *b;
int width, height, offset, good, *right_end;
{
register left, top, direc, score, i, j;
int max_score, max_i, max_j;
*right_end=-1;
if (width<2 || height<2)
return;
while (1) { /* keep trimming until no more vector found */
/* setting the upper-left-most cell */
Score(0,0)=max_score=match[*a][*b];
Direc(0,0)=NIL;
max_i=max_j=0;
/* setting the first row */
for (i=1; i<width; i++) {
direc=NIL;
score=match[a[i]][*b];
left=Score(i-1,0)+left_penalty[Direc(i-1,0)];
if (score<left) {
direc=LFT; score=left;
}
Direc(i,0)=direc;
Score(i,0)=score;
if (score>max_score) {
max_score=score; max_i=i; max_j=0;
}
}
/* setting the first column */
for (j=1; j<height; j++) {
direc=NIL;
score=match[*a][b[j]];
top=Score(0,j-1)+top_penalty[Direc(0,j-1)];
if (score<top) {
direc=TOP; score=top;
}
Direc(0,j)=direc;
Score(0,j)=score;
if (score>max_score) {
max_score=score; max_i=0; max_j=j;
}
}
/* running dynamic programming for internal nodes */
for (j=1; j<height; j++)
for (i=1; i<width; i++) {
score=Score(i-1,j-1);
if (score>=0) {
score+=match[a[i]][b[j]];
direc=DIA;
} else {
score=match[a[i]][b[j]];
direc=NIL;
}
top=Score(i,j-1)+top_penalty[Direc(i,j-1)];
left=Score(i-1,j)+left_penalty[Direc(i-1,j)];
if (top>left) {
if (score<top) {
direc=TOP; score=top;
}
} else if (left>top) {
if (score<left) {
direc=LFT; score=left;
}
} else if (score<top) {
if (i*height>=j*width) {
direc=LFT; score=left;
} else {
direc=TOP; score=top;
}
}
Direc(i,j)=direc;
Score(i,j)=score;
if (score>=max_score) {
max_score=score; max_i=i; max_j=j;
}
}
/* bracktrack to find the longest alignment path */
i=max_i; j=max_j;
while (direc=Direc(i,j))
switch (direc) {
case LFT:
i--; break;
case DIA:
i--; j--; break;
case TOP:
j--; break;
}
/* check if done or continue chopping */
j=offset+max_i;
left=max_i-i+1;
if (left<splice_match_base[j]) {
if (j<splice_match_base[j])
*right_end=j;
return;
} else if (left<i+offset-good && left<width/2)
return;
else {
*right_end=j;
max_i++; max_j++;
offset+=max_i;
a+=max_i;
width-=max_i;
b+=max_j;
height-=max_j;
if (width<splice_match_base[j] || height<splice_match_base[j])
return;
}
}
}
void splice_align_right(a, width, b, height, offset, span, left_end)
char *a, *b;
int width, height, offset, span, *left_end;
{
register left, top, direc, score, i, j;
int max_score, max_i, max_j;
*left_end=-1;
if (width<span || height<span)
return;
/* setting the upper-left-most cell */
Score(0,0)=max_score=match[*a][*b];
Direc(0,0)=NIL;
max_i=max_j=0;
/* setting the first row */
for (i=1; i<width; i++) {
direc=NIL;
score=match[a[i]][*b];
left=Score(i-1,0)+left_penalty[Direc(i-1,0)];
if (score<left) {
direc=LFT; score=left;
}
Direc(i,0)=direc;
Score(i,0)=score;
if (score>max_score) {
max_score=score; max_i=i; max_j=0;
}
}
/* setting the first column */
for (j=1; j<height; j++) {
direc=NIL;
score=match[*a][b[j]];
top=Score(0,j-1)+top_penalty[Direc(0,j-1)];
if (score<top) {
direc=TOP; score=top;
}
Direc(0,j)=direc;
Score(0,j)=score;
if (score>max_score) {
max_score=score; max_i=0; max_j=j;
}
}
/* running dynamic programming for internal nodes */
for (j=1; j<height; j++)
for (i=1; i<width; i++) {
score=Score(i-1,j-1);
if (score>=0) {
score+=match[a[i]][b[j]];
direc=DIA;
} else {
score=match[a[i]][b[j]];
direc=NIL;
}
top=Score(i,j-1)+top_penalty[Direc(i,j-1)];
left=Score(i-1,j)+left_penalty[Direc(i-1,j)];
if (top>left) {
if (score<top) {
direc=TOP; score=top;
}
} else if (left>top) {
if (score<left) {
direc=LFT; score=left;
}
} else if (score<top) {
if (i*height>=j*width) {
direc=LFT; score=left;
} else {
direc=TOP; score=top;
}
}
Direc(i,j)=direc;
Score(i,j)=score;
if (score>=max_score) {
max_score=score; max_i=i; max_j=j;
}
}
/* bracktrack to find the longest alignment path */
i=max_i; j=max_j;
while (direc=Direc(i,j))
switch (direc) {
case LFT:
i--; break;
case DIA:
i--; j--; break;
case TOP:
j--; break;
}
j=max_i-i+1;
/* The length of a good hit must meet one of these 2 criteria: */
/* at least as long as the minimum good hit (span), and at least */
/* as long as the remaining target sequence that follows the hit; */
/* or longer than half the size of the splice sequence. */
if (j>=span && j>=width-max_i || j>height/2)
*left_end=i+offset;
}
/****************************************************************************
*
* Copyright (c) 2003, The Institute for Genomic Research (TIGR), Rockville,
* Maryland, U.S.A. All rights reserved.
*
****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
extern void giveup(char *);
static int Dlist[]={ 0, 2, 3}, Hlist[]={ 0, 1, 3},
rev_code[]={ 3, 2, 1, 0, 0, 6, 5, 10, 8, 9, 7, 12, 11, 14, 13, 15};
int tag_size;
static int tag_len;
static unsigned tag_mask;
static unsigned *tag_well;
int tag_code(c)
int c;
{
register int tmp;
switch (c) {
case 0: tmp=0; break;
case 1: tmp=1; break;
case 2: tmp=2; break;
case 3:
case 4: tmp=3; break;
case 5: tmp=random()%2 ? 2 : 0; break;
case 6: tmp=random()%2 ? 3 : 1; break;
case 7: tmp=random()%2 ; break;
case 8: tmp=random()%2 ? 3 : 0; break;
case 9: tmp=random()%2 ? 2 : 1; break;
case 10: tmp=random()%2 ? 3 : 2; break;
case 11: tmp=Dlist[random()%3]; break;