Commit e0bade90 authored by Andreas Tille's avatar Andreas Tille

New upstream version 3.8.1551

parent 0d81e907
# Porting notes:
# For Solaris and other platforms where the logf function
# is missing from the math library, add the following line
# to the end of muscle.h:
# #define logf(x) ((float) log(x))
# Using -static increases the executable size and thus gives a very
# small increase in start time, but is more portable (the binding
# to dynamic libraries often breaks when a new library is released).
# On OSX, using -static gives the error "ld: can't locate file for: -lcrt0.o",
# this is fixed by deleting "-static" from the LDLIBS line.
CFLAGS = -O3 -funroll-loops -Winline -DNDEBUG=1
LDLIBS = -lm -static
# LDLIBS = -lm
OBJ = .o
EXE =
RM = rm -f
CP = cp
GPP = g++
LD = $(GPP) $(CFLAGS)
CPP = $(GPP) -c $(CFLAGS)
all: muscle
CPPSRC = $(sort $(wildcard *.cpp))
CPPOBJ = $(subst .cpp,.o,$(CPPSRC))
$(CPPOBJ): %.o: %.cpp
$(CPP) $< -o $@
muscle: $(CPPOBJ)
$(LD) -o muscle $(CPPOBJ) $(LDLIBS)
strip muscle
......@@ -170,7 +170,7 @@ unsigned DiagBreak(const Diag &d1, const Diag &d2)
}
// Merge diagonals that are continuations of each other with
// short breaks of up to length g_uMaxDiagBreak.
// int breaks of up to length g_uMaxDiagBreak.
// In a sorted list of diagonals, we only have to check
// consecutive entries.
void MergeDiags(DiagList &DL)
......
......@@ -79,19 +79,22 @@ void DoMuscle()
if (0 != UserMatrix)
g_ptrScoreMatrix = UserMatrix;
unsigned uMinL = 0;
unsigned uMaxL = 0;
unsigned uTotL = 0;
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
{
unsigned L = v.GetSeq(uSeqIndex).Length();
uTotL += L;
if (uMinL == 0 || L < uMinL)
uMinL = L;
if (L > uMaxL)
uMaxL = L;
}
SetIter(1);
g_bDiags = g_bDiags1;
SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);
SetSeqStats(uSeqCount, uMinL, uMaxL, uTotL/uSeqCount);
SetMuscleSeqVect(v);
......
......@@ -20,7 +20,7 @@ represented as two estrings, one for each sequence.
#define c2(c,d) (((unsigned char) c) << 8 | (unsigned char) d)
unsigned LengthEstring(const short es[])
unsigned LengthEstring(const int es[])
{
unsigned i = 0;
while (*es++ != 0)
......@@ -28,15 +28,15 @@ unsigned LengthEstring(const short es[])
return i;
}
short *EstringNewCopy(const short es[])
int *EstringNewCopy(const int es[])
{
unsigned n = LengthEstring(es) + 1;
short *esNew = new short[n];
memcpy(esNew, es, n*sizeof(short));
int *esNew = new int[n];
memcpy(esNew, es, n*sizeof(int));
return esNew;
}
void LogEstring(const short es[])
void LogEstring(const int es[])
{
Log("<");
for (unsigned i = 0; es[i] != 0; ++i)
......@@ -48,7 +48,7 @@ void LogEstring(const short es[])
Log(">");
}
static bool EstringsEq(const short es1[], const short es2[])
static bool EstringsEq(const int es1[], const int es2[])
{
for (;;)
{
......@@ -62,14 +62,14 @@ static bool EstringsEq(const short es1[], const short es2[])
return true;
}
static void EstringCounts(const short es[], unsigned *ptruSymbols,
static void EstringCounts(const int es[], unsigned *ptruSymbols,
unsigned *ptruIndels)
{
unsigned uSymbols = 0;
unsigned uIndels = 0;
for (unsigned i = 0; es[i] != 0; ++i)
{
short n = es[i];
int n = es[i];
if (n > 0)
uSymbols += n;
else if (n < 0)
......@@ -79,7 +79,7 @@ static void EstringCounts(const short es[], unsigned *ptruSymbols,
*ptruIndels = uIndels;
}
static char *EstringOp(const short es[], const char s[])
static char *EstringOp(const int es[], const char s[])
{
unsigned uSymbols;
unsigned uIndels;
......@@ -104,7 +104,7 @@ static char *EstringOp(const short es[], const char s[])
return sout;
}
void EstringOp(const short es[], const Seq &sIn, Seq &sOut)
void EstringOp(const int es[], const Seq &sIn, Seq &sOut)
{
#if DEBUG
unsigned uSymbols;
......@@ -132,7 +132,7 @@ void EstringOp(const short es[], const Seq &sIn, Seq &sOut)
}
}
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
unsigned EstringOp(const int es[], const Seq &sIn, MSA &a)
{
unsigned uSymbols;
unsigned uIndels;
......@@ -168,14 +168,14 @@ unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
return uColCount;
}
void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB)
void PathToEstrings(const PWPath &Path, int **ptresA, int **ptresB)
{
// First pass to determine size of estrings esA and esB
const unsigned uEdgeCount = Path.GetEdgeCount();
if (0 == uEdgeCount)
{
short *esA = new short[1];
short *esB = new short[1];
int *esA = new int[1];
int *esB = new int[1];
esA[0] = 0;
esB[0] = 0;
*ptresA = esA;
......@@ -223,7 +223,7 @@ void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB)
// Pass2 for seq A
{
short *esA = new short[iLengthA+1];
int *esA = new int[iLengthA+1];
unsigned iA = 0;
switch (Path.GetEdge(0).cType)
{
......@@ -284,7 +284,7 @@ void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB)
{
// Pass2 for seq B
short *esB = new short[iLengthB+1];
int *esB = new int[iLengthB+1];
unsigned iB = 0;
switch (Path.GetEdge(0).cType)
{
......@@ -363,7 +363,7 @@ void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB)
#endif
}
void EstringsToPath(const short esA[], const short esB[], PWPath &Path)
void EstringsToPath(const int esA[], const int esB[], PWPath &Path)
{
Path.Clear();
unsigned iA = 0;
......@@ -461,7 +461,7 @@ Therefore,
<-1,3>*<2,-1,2> = <-1,1,-1,2>
***/
static bool CanMultiplyEstrings(const short es1[], const short es2[])
static bool CanMultiplyEstrings(const int es1[], const int es2[])
{
unsigned uSymbols1;
unsigned uSymbols2;
......@@ -472,8 +472,9 @@ static bool CanMultiplyEstrings(const short es1[], const short es2[])
return uSymbols1 + uIndels1 == uSymbols2;
}
static inline void AppendGaps(short esp[], int &ip, int n)
static inline void AppendGaps(int esp[], int &ip, int n)
{
assert(n < SHRT_MAX);
if (-1 == ip)
esp[++ip] = n;
else if (esp[ip] < 0)
......@@ -482,8 +483,9 @@ static inline void AppendGaps(short esp[], int &ip, int n)
esp[++ip] = n;
}
static inline void AppendSymbols(short esp[], int &ip, int n)
static inline void AppendSymbols(int esp[], int &ip, int n)
{
assert(n < SHRT_MAX);
if (-1 == ip)
esp[++ip] = n;
else if (esp[ip] > 0)
......@@ -492,10 +494,8 @@ static inline void AppendSymbols(short esp[], int &ip, int n)
esp[++ip] = n;
}
void MulEstrings(const short es1[], const short es2[], short esp[])
void MulEstrings(const int es1[], const int es2[], int esp[])
{
assert(CanMultiplyEstrings(es1, es2));
unsigned i1 = 0;
int ip = -1;
int n1 = es1[i1++];
......@@ -597,7 +597,7 @@ void MulEstrings(const short es1[], const short es2[], short esp[])
#endif
}
static void test(const short es1[], const short es2[], const short esa[])
static void test(const int es1[], const int es2[], const int esa[])
{
unsigned uSymbols1;
unsigned uSymbols2;
......@@ -626,7 +626,7 @@ static void test(const short es1[], const short es2[], const short esa[])
LogEstring(esa);
Log("\n");
short esp[4096];
int esp[4096];
MulEstrings(es1, es2, esp);
LogEstring(esp);
if (!EstringsEq(esp, esa))
......@@ -644,45 +644,45 @@ void TestEstrings()
{
SetListFileName("c:\\tmp\\muscle.log", false);
//{
//short es1[] = { -1, 1, -1, 0 };
//short es2[] = { 1, -1, 2, 0 };
//short esa[] = { -2, 1, -1, 0 };
//int es1[] = { -1, 1, -1, 0 };
//int es2[] = { 1, -1, 2, 0 };
//int esa[] = { -2, 1, -1, 0 };
//test(es1, es2, esa);
//}
//{
//short es1[] = { 2, -1, 2, 0 };
//short es2[] = { 1, -1, 3, -1, 1, 0 };
//short esa[] = { 1, -1, 1, -1, 1, -1, 1, 0 };
//int es1[] = { 2, -1, 2, 0 };
//int es2[] = { 1, -1, 3, -1, 1, 0 };
//int esa[] = { 1, -1, 1, -1, 1, -1, 1, 0 };
//test(es1, es2, esa);
//}
//{
//short es1[] = { -1, 3, 0 };
//short es2[] = { 2, -1, 2, 0 };
//short esa[] = { -1, 1, -1, 2, 0 };
//int es1[] = { -1, 3, 0 };
//int es2[] = { 2, -1, 2, 0 };
//int esa[] = { -1, 1, -1, 2, 0 };
//test(es1, es2, esa);
//}
//{
//short es1[] = { -1, 1, -1, 1, 0};
//short es2[] = { 4, 0 };
//short esa[] = { -1, 1, -1, 1, 0};
//int es1[] = { -1, 1, -1, 1, 0};
//int es2[] = { 4, 0 };
//int esa[] = { -1, 1, -1, 1, 0};
//test(es1, es2, esa);
//}
//{
//short es1[] = { 1, -1, 1, -1, 0};
//short es2[] = { 4, 0 };
//short esa[] = { 1, -1, 1, -1, 0};
//int es1[] = { 1, -1, 1, -1, 0};
//int es2[] = { 4, 0 };
//int esa[] = { 1, -1, 1, -1, 0};
//test(es1, es2, esa);
//}
//{
//short es1[] = { 1, -1, 1, -1, 0};
//short es2[] = { -1, 4, -1, 0 };
//short esa[] = { -1, 1, -1, 1, -2, 0};
//int es1[] = { 1, -1, 1, -1, 0};
//int es2[] = { -1, 4, -1, 0 };
//int esa[] = { -1, 1, -1, 1, -2, 0};
//test(es1, es2, esa);
//}
{
short es1[] = { 106, -77, 56, -2, 155, -3, 123, -2, 0};
short es2[] = { 50, -36, 34, -3, 12, -6, 1, -6, 18, -17, 60, -5, 349, -56, 0 };
short esa[] = { 0 };
int es1[] = { 106, -77, 56, -2, 155, -3, 123, -2, 0};
int es2[] = { 50, -36, 34, -3, 12, -6, 1, -6, 18, -17, 60, -5, 349, -56, 0 };
int esa[] = { 0 };
test(es1, es2, esa);
}
exit(0);
......
#ifndef pathsum_h
#define pathsum_h
void PathToEstrings(const PWPath &Path, int **ptresA, int **ptresB);
void EstringsToPath(const int esA[], const int esB[], PWPath &Path);
void MulEstrings(const int es1[], const int es2[], int esp[]);
void EstringOp(const int es[], const Seq &sIn, Seq &sOut);
unsigned EstringOp(const int es[], const Seq &sIn, MSA &a);
void LogEstring(const int es[]);
unsigned LengthEstring(const int es[]);
int *EstringNewCopy(const int es[]);
#endif // pathsum_h
......@@ -8,7 +8,7 @@ const unsigned TRIPLE_COUNT = 20*20*20;
struct TripleCount
{
unsigned m_uSeqCount; // How many sequences have this triple?
unsigned short *m_Counts; // m_Counts[s] = nr of times triple found in seq s
unsigned int *m_Counts; // m_Counts[s] = nr of times triple found in seq s
};
static TripleCount *TripleCounts;
......@@ -36,8 +36,8 @@ void DistKmer20_3(const SeqVect &v, DistFunc &DF)
for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
{
TripleCount &tc = *(TripleCounts + uWord);
const unsigned uBytes = uSeqCount*sizeof(short);
tc.m_Counts = (unsigned short *) malloc(uBytes);
const unsigned uBytes = uSeqCount*sizeof(int);
tc.m_Counts = (unsigned int *) malloc(uBytes);
memset(tc.m_Counts, 0, uBytes);
}
......@@ -118,7 +118,7 @@ void DistKmer20_3(const SeqVect &v, DistFunc &DF)
#endif
const unsigned uSeqListBytes = uSeqCount*sizeof(unsigned);
unsigned short *SeqList = (unsigned short *) malloc(uSeqListBytes);
unsigned int *SeqList = (unsigned int *) malloc(uSeqListBytes);
for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
{
......
......@@ -88,7 +88,7 @@ void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
// Build tuple map for the longer profile, B
if (uLengthB < KTUP)
Quit("FindDiags: profile too short");
Quit("FindDiags: profile too int");
memset(TuplePos, EMPTY, sizeof(TuplePos));
......
......@@ -79,7 +79,7 @@ void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
// Build tuple map for the longer profile, B
if (uLengthB < K)
Quit("FindDiags: profile too short");
Quit("FindDiags: profile too int");
memset(TuplePos, EMPTY, sizeof(TuplePos));
......
......@@ -13,8 +13,8 @@
#include <netinet/icmp6.h>
#include <sys/vmmeter.h>
#include <sys/proc.h>
#include <mach/task_info.h>
#include <mach/task.h>
#include <mach/task_info.h>
#include <mach/mach_init.h>
#include <mach/vm_statistics.h>
......
......@@ -11,8 +11,8 @@
static void PathSeq(const Seq &s, const PWPath &Path, bool bRight, Seq &sOut)
{
short *esA;
short *esB;
int *esA;
int *esB;
PathToEstrings(Path, &esA, &esB);
const unsigned uSeqLength = s.Length();
......@@ -73,11 +73,11 @@ static void MakeRootSeq(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeI
#endif // VALIDATE
static short *MakeRootSeqE(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,
const ProgNode Nodes[], Seq &sRoot, short *Estring1, short *Estring2)
static int *MakeRootSeqE(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex,
const ProgNode Nodes[], Seq &sRoot, int *Estring1, int *Estring2)
{
short *EstringCurr = Estring1;
short *EstringNext = Estring2;
int *EstringCurr = Estring1;
int *EstringNext = Estring2;
const unsigned uSeqLength = s.Length();
EstringCurr[0] = uSeqLength;
......@@ -92,7 +92,7 @@ static short *MakeRootSeqE(const Seq &s, const Tree &GuideTree, unsigned uLeafNo
bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex);
uNodeIndex = uParent;
const PWPath &Path = Nodes[uNodeIndex].m_Path;
const short *EstringNode = bRight ?
const int *EstringNode = bRight ?
Nodes[uNodeIndex].m_EstringL : Nodes[uNodeIndex].m_EstringR;
MulEstrings(EstringCurr, EstringNode, EstringNext);
......@@ -108,7 +108,7 @@ static short *MakeRootSeqE(const Seq &s, const Tree &GuideTree, unsigned uLeafNo
LogEstring(EstringNext);
Log("\n");
#endif
short *EstringTmp = EstringNext;
int *EstringTmp = EstringNext;
EstringNext = EstringCurr;
EstringCurr = EstringTmp;
}
......@@ -170,8 +170,8 @@ void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path;
const unsigned uRootColCount = RootPath.GetEdgeCount();
const unsigned uEstringSize = uRootColCount + 1;
short *Estring1 = new short[uEstringSize];
short *Estring2 = new short[uEstringSize];
int *Estring1 = new int[uEstringSize];
int *Estring2 = new int[uEstringSize];
SetProgressDesc("Root alignment");
unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree);
......@@ -183,7 +183,7 @@ void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
const Seq &s = *(v[uId]);
Seq sRootE;
short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,
int *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,
Estring1, Estring2);
Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es);
......
......@@ -68,7 +68,7 @@ void MSA::ToMSFFile(TextFile &File, const char *ptrComment) const
else
File.PutString("\n");
char seqtype = (g_Alpha == ALPHA_DNA || g_Alpha == ALPHA_RNA) ? 'N' : 'A';
char seqtype = (g_Alpha == ALPHA_DNA || g_Alpha == ALPHA_RNA) ? 'N' : 'P';
File.PutFormat(" MSF: %u Type: %c Check: 0000 ..\n\n",
GetColCount(), seqtype);
......
......@@ -43,19 +43,22 @@ void MUSCLE(SeqVect &v, MSA &msaOut)
g_Distance1 = DISTANCE_Kmer4_6;
}
unsigned uMinL = 0;
unsigned uMaxL = 0;
unsigned uTotL = 0;
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
{
unsigned L = v.GetSeq(uSeqIndex).Length();
uTotL += L;
if (uMinL == 0 || L < uMinL)
uMinL = L;
if (L > uMaxL)
uMaxL = L;
}
SetIter(1);
g_bDiags = g_bDiags1;
SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);
SetSeqStats(uSeqCount, uMinL, uMaxL, uTotL/uSeqCount);
MSA::SetIdCount(uSeqCount);
......
......@@ -241,7 +241,7 @@ void Progress(const char *szFormat, ...);
void SetStartTime();
void ProgressStepsDone();
void SetProgressDesc(const char szDesc[]);
void SetSeqStats(unsigned uSeqCount, unsigned uMaxL, unsigned uAvgL);
void SetSeqStats(unsigned uSeqCount, unsigned uMinL, unsigned uMaxL, unsigned uAvgL);
void SetNewHandler();
void SaveCurrentAlignment();
......