Commit bacfc626 authored by Andreas Tille's avatar Andreas Tille

Imported Upstream version 1.2.2

parent 650b2947
......@@ -31,7 +31,7 @@ PROJECT_NAME = "Clustal Omega"
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 1.2.1
PROJECT_NUMBER = 1.2.2
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.
......
......@@ -282,6 +282,70 @@ not `/usr/local'. It is recommended to use the following options:
./configure --prefix=/boot/common
On Windows do
1. Preparation
1.1. Install free MinGW64 on Windows 7. Download
mingw-w64-bin_x86_64-mingw_20111101_sezero.zip from
http://sourceforge.net/projects/mingw-w64/files/Toolchains
targetting Win64/Personal Builds/sezero_4.5_20111101/, extract
it, move mingw64 folder to C:\ and rename it to mingw. MinGW64
provides tools to develop 64-bit Windows applications using
gcc and g++. With MinGW64, some software developed for Linux
platform can be built on Windows.
1.2. There is a file named pthreads-w64.zip in C:\mingw
folder. Extract it under C:\mingw folder.
1.3. Download MSYS-1.0.11.exe from
http://sourceforge.net/projects/mingw/files/MSYS/Base/msys-core/msys-1.0.11/
and install it.
1.4. Download Clustal Omega source from
http://www.clustal.org/omega/clustal-omega-x.x.x.tar.gz (where
x.x.x is the current version)
1.5. Copy downloaded file to MSYS. If you installed MSYS in
C:\msys and your account is Administrator, copy it to
C:\msys\1.0\home\Administrator. You can do it using Windows
explorer.
1.6. Download argtable2-13.tar.gz from
http://argtable.sourceforge.net/ and copy it to MSYS. This is
required by Clustal Omega.
2. Configuration and Build process
2.1. Launch MSYS and extract argtable2 source as tar xfz
argtable2-13.tar.gz.
2.2. Extract Clustal Omega source as tar xfz
clustal-omega-1.2.0.tar.gz.
2.3. cd argtable2-13; ./configure; make; make install
2.4. cd ~/clustal-omega-1.2.0
2.5. ./configure CFLAGS='-I/usr/local/include
-DSRE_STRICT_ANSI' LDFLAGS='-L/usr/local/lib'
2.6. make; make install
2.7. You can find clustalo.exe in /usr/local/bin folder which
is C:\msys\1.0\local/bin.
2.8. Following DLLs are necessary to run clustalo.exe. Put
them in the same folder where clustalo.exe
exists. C:\mingw\bin\libcc_sjlj-1.dll,
C:\mingw\bin\libgomp-1.dll, C:\mingw\bin\libstdc++-6.dll,
C:\mingw\bin\pthreadGC2-w64.dll
(lifted from
http://www.blaststation.com/freestuff/en/howtoBuildx64ClustalO.php,
as of 2014-10-14)
Specifying the System Type
==========================
......
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.63 for Clustal Omega 1.2.1.
# Generated by GNU Autoconf 2.63 for Clustal Omega 1.2.2.
#
# Report bugs to <clustalw@ucd.ie>.
#
......@@ -745,8 +745,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='Clustal Omega'
PACKAGE_TARNAME='clustal-omega'
PACKAGE_VERSION='1.2.1'
PACKAGE_STRING='Clustal Omega 1.2.1'
PACKAGE_VERSION='1.2.2'
PACKAGE_STRING='Clustal Omega 1.2.2'
PACKAGE_BUGREPORT='clustalw@ucd.ie'
# Factoring default headers for most tests.
......@@ -1483,7 +1483,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures Clustal Omega 1.2.1 to adapt to many kinds of systems.
\`configure' configures Clustal Omega 1.2.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
......@@ -1553,7 +1553,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of Clustal Omega 1.2.1:";;
short | recursive ) echo "Configuration of Clustal Omega 1.2.2:";;
esac
cat <<\_ACEOF
......@@ -1659,7 +1659,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
Clustal Omega configure 1.2.1
Clustal Omega configure 1.2.2
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
......@@ -1673,7 +1673,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by Clustal Omega $as_me 1.2.1, which was
It was created by Clustal Omega $as_me 1.2.2, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
......@@ -2492,7 +2492,7 @@ fi
# Define the identity of the package.
PACKAGE='clustal-omega'
VERSION='1.2.1'
VERSION='1.2.2'
cat >>confdefs.h <<_ACEOF
......@@ -21790,7 +21790,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by Clustal Omega $as_me 1.2.1, which was
This file was extended by Clustal Omega $as_me 1.2.2, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
......@@ -21853,7 +21853,7 @@ Report bugs to <bug-autoconf@gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
Clustal Omega config.status 1.2.1
Clustal Omega config.status 1.2.2
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
......
# configure.ac for Clustal Omega
#
# RCS $Id: configure.ac 292 2014-02-28 14:26:55Z fabian $
# RCS $Id: configure.ac 310 2016-06-13 14:11:35Z fabian $
# release
......@@ -26,7 +26,9 @@
#PACKAGE_CODENAME="FilumVitae"
#AC_INIT([Clustal Omega], [1.2.0], [clustalw@ucd.ie])
#PACKAGE_CODENAME="AndreaGiacomo"
AC_INIT([Clustal Omega], [1.2.1], [clustalw@ucd.ie])
#AC_INIT([Clustal Omega], [1.2.1], [clustalw@ucd.ie])
#PACKAGE_CODENAME="AndreaGiacomo"
AC_INIT([Clustal Omega], [1.2.2], [clustalw@ucd.ie])
PACKAGE_CODENAME="AndreaGiacomo"
# The AC_INIT macro can take any source file as an argument. It just
......
......@@ -199,7 +199,9 @@
/* #undef MINGW */
/* No-debug Mode */
/* #undef NDEBUG */
#ifndef CLUSTAL_OMEGA_NDEBUG
#define CLUSTAL_OMEGA_NDEBUG /**/
#endif
/* Some strange OS */
/* #undef OTHEROS */
......@@ -226,7 +228,7 @@
/* Define to the full name and version of this package. */
#ifndef CLUSTAL_OMEGA_PACKAGE_STRING
#define CLUSTAL_OMEGA_PACKAGE_STRING "Clustal Omega 1.2.1"
#define CLUSTAL_OMEGA_PACKAGE_STRING "Clustal Omega 1.2.2"
#endif
/* Define to the one symbol short name of this package. */
......@@ -236,7 +238,7 @@
/* Define to the version of this package. */
#ifndef CLUSTAL_OMEGA_PACKAGE_VERSION
#define CLUSTAL_OMEGA_PACKAGE_VERSION "1.2.1"
#define CLUSTAL_OMEGA_PACKAGE_VERSION "1.2.2"
#endif
/* The size of `fpos_t', as computed by sizeof. */
......@@ -299,7 +301,7 @@
/* Version number of package */
#ifndef CLUSTAL_OMEGA_VERSION
#define CLUSTAL_OMEGA_VERSION "1.2.1"
#define CLUSTAL_OMEGA_VERSION "1.2.2"
#endif
/* Define if using the dmalloc debugging malloc package */
......
......@@ -15,7 +15,7 @@
********************************************************************/
/*
* RCS $Id: clustal-omega.c 290 2013-09-20 15:18:12Z fabian $
* RCS $Id: clustal-omega.c 304 2016-06-13 13:39:13Z fabian $
*/
#ifdef HAVE_CONFIG_H
......@@ -207,7 +207,8 @@ SetDefaultAlnOpts(opts_t *prOpts) {
prOpts->ppcHMMInput = NULL;
prOpts->iHMMInputFiles = 0;
prOpts->pcHMMBatch = NULL; /* FS, r291 -> */
prOpts->iNumIterations = 0;
prOpts->bIterationsAuto = FALSE;
prOpts->iMaxGuidetreeIterations = INT_MAX;
......@@ -958,7 +959,7 @@ SetAutoOptions(opts_t *prOpts, int iNumSeq) {
int
Align(mseq_t *prMSeq,
mseq_t *prMSeqProfile,
opts_t *prOpts) {
opts_t *prOpts) { /* Note DEVEL 291: at this stage pppcHMMBNames is set but ppiHMMBindex is not */
/* HMM
*/
......@@ -980,6 +981,10 @@ Align(mseq_t *prMSeq,
/* last dAlnScore for iteration */
double dLastAlnScore = -666.666;
/* HMM batch file */
char **ppcHMMbatch = NULL; /* names of unique HMM files */
int iHMMbatch = 0; /* number of unique HMM files */
int i, j; /* aux */
assert(NULL != prMSeq);
......@@ -1027,17 +1032,57 @@ Align(mseq_t *prMSeq,
}
/* Read backgrounds HMMs and store in prHMMs
/* Read backgrounds HMMs and store in prHMMs (Devel 291)
*
*/
if (0 < prOpts->iHMMInputFiles) {
if (NULL != prOpts->pcHMMBatch){
int i, j, k;
for (i = 0; i < prMSeq->nseqs; i++){
if (NULL != prMSeq->pppcHMMBNames[i]){
for (j = 0; NULL != prMSeq->pppcHMMBNames[i][j]; j++){
for (k = 0; k < iHMMbatch; k++){
if (0 == strcmp(ppcHMMbatch[k], prMSeq->pppcHMMBNames[i][j])){
prMSeq->ppiHMMBindex[i][j] = k;
break; /* HMM already registered */
}
} /* went through HMM batch files already identified */
if (k == iHMMbatch){
FILE *pfHMM = NULL;
if (NULL == (pfHMM = fopen(prMSeq->pppcHMMBNames[i][j], "r"))){
prMSeq->ppiHMMBindex[i][j] = -1;
Log(&rLog, LOG_WARN, "Background HMM %s for %s (%d/%d) does not exist",
prMSeq->pppcHMMBNames[i][j], prMSeq->sqinfo[i].name, i, j);
}
else {
fclose(pfHMM); pfHMM = NULL;
ppcHMMbatch = (char **)realloc(ppcHMMbatch, (iHMMbatch+1)*sizeof(char *));
ppcHMMbatch[iHMMbatch] = strdup(prMSeq->pppcHMMBNames[i][j]);
prMSeq->ppiHMMBindex[i][j] = k;
iHMMbatch++;
}
}
} /* j = 0; NULL != prMSeq->pppcHMMBNames[i][j] */
} /* NULL != prMSeq->pppcHMMBNames[i] */
else {
/* void */
}
} /* 0 <= i < prMSeq->nseqs */
} /* there was a HMM batch file */
if (0 < prOpts->iHMMInputFiles) {
int iHMMInfileIndex;
/**
* @warning old structure used to be initialised like this:
* hmm_light rHMM = {0};
*/
prHMMs = (hmm_light *) CKMALLOC(prOpts->iHMMInputFiles * sizeof(hmm_light));
prHMMs = (hmm_light *) CKMALLOC( (prOpts->iHMMInputFiles) * sizeof(hmm_light));
for (iHMMInfileIndex=0; iHMMInfileIndex<prOpts->iHMMInputFiles; iHMMInfileIndex++) {
char *pcHMMInput = prOpts->ppcHMMInput[iHMMInfileIndex];
......@@ -1077,6 +1122,24 @@ Align(mseq_t *prMSeq,
CKFREE(prOpts->ppcHMMInput);
} /* there were background HMM files */
/** read HMMs specific to individual sequences
*/
if (iHMMbatch > 0){
int i;
prHMMs = (hmm_light *) realloc( prHMMs, (prOpts->iHMMInputFiles + iHMMbatch + 1) * sizeof(hmm_light));
for (i = 0; i < iHMMbatch; i++){
char *pcHMMInput = ppcHMMbatch[i];
if (OK != readHMMWrapper(&prHMMs[i + prOpts->iHMMInputFiles], pcHMMInput)){
Log(&rLog, LOG_ERROR, "Processing of HMM file %s failed", pcHMMInput);
return -1;
}
} /* 0 <= i < iHMMbatch */
} /* there were HMM batch files */
/* If the input ("non-profile") sequences are aligned, then turn
......@@ -1172,6 +1235,9 @@ Align(mseq_t *prMSeq,
if (prOpts->iMaxHMMIterations < 0){
Log(&rLog, LOG_VERBOSE,
"iMaxHMMIterations < 0 (%d), will not perform alignment", prOpts->iMaxHMMIterations);
if (NULL != piOrderLR){
free(piOrderLR); piOrderLR = NULL;
}
return 0;
}
......
......@@ -121,6 +121,8 @@ typedef struct {
/** number of provided HMM input files. not really a user
option but need for ppcHMMInput */
int iHMMInputFiles;
/** HMM batch-file, specify HMMs for individual sequences. FS, r291 -> */
char *pcHMMBatch;
/* Iteration
*/
......
This diff is collapsed.
......@@ -15,7 +15,7 @@
********************************************************************/
/*
* RCS $Id: ktuple_pair.c 230 2011-04-09 15:37:50Z andreas $
* RCS $Id: ktuple_pair.c 305 2016-06-13 13:46:02Z fabian $
*
*
* K-Tuple code for pairwise alignment (Wilbur and Lipman, 1983; PMID
......@@ -649,7 +649,7 @@ KTuplePairDist(symmatrix_t *tmat, mseq_t *mseq,
/* int uStepNo, uTotalStepNo; */
ktuple_param_t aln_param = default_protein_param;
bool bPrintCR = (rLog.iLogLevelEnabled<=LOG_VERBOSE) ? FALSE : TRUE;
if(prProgress == NULL) {
NewProgress(&prProgress, LogGetFP(&rLog, LOG_INFO),
......@@ -822,7 +822,9 @@ KTuplePairDist(symmatrix_t *tmat, mseq_t *mseq,
#pragma omp critical(ktuple)
#if 0
{
printf("steps: %d\n", private_step_no);
int tid;
tid = omp_get_thread_num();
printf("%s:%d: tid %d: steps %d\n", __FILE__, __LINE__, tid, private_step_no);
}
#endif
#endif
......
......@@ -15,7 +15,7 @@
********************************************************************/
/*
* RCS $Id: mbed.c 283 2013-06-10 17:42:14Z fabian $
* RCS $Id: mbed.c 300 2016-06-13 13:29:58Z fabian $
*
*
* Reimplementation from scratch of mBed (Blackshields et al., 2010;
......@@ -306,9 +306,9 @@ SeqToVec(double **ppdSeqVec, mseq_t *prMSeq,
int iSeqIndex;
int iSeedIndex;
/* indices for restoring order */
int *restore;
int *restore = NULL;
/* sorted copy of piSeeds */
int *piSortedSeeds;
int *piSortedSeeds = NULL;
#if TIMING
Stopwatch_t *stopwatch = StopwatchCreate();
......@@ -446,6 +446,7 @@ SeqToVec(double **ppdSeqVec, mseq_t *prMSeq,
FreeSymMatrix(&prDistmat);
CKFREE(restore);
CKFREE(piSortedSeeds);
#if TIMING
StopwatchStop(stopwatch);
StopwatchDisplay(stdout, "Total time for SeqToVec(): ", stopwatch);
......@@ -1267,8 +1268,8 @@ Mbed(tree_t **prMbedTree_p, mseq_t *prMSeq, const int iPairDistType,
for (iI = 0; iI < prKMeansResult->iNClusters; iI++) {
for (iJ=0; iJ<prKMeansResult->piNObjsPerCluster[iI]; iJ++) {
int iRealIndex = prKMeansResult->ppiObjIndicesPerCluster[iI][iJ];
fprintf(pfClust, "Cluster %u: object %u has index %u (=seq %s )\t %s\n",
iI, iJ, iRealIndex, prMSeq->sqinfo[iRealIndex].name, ppcClusterSplits[iRealIndex]);
fprintf(pfClust, "Cluster %u: object %u has index %u (=seq %s %d~len)\t %s\n",
iI, iJ, iRealIndex, prMSeq->sqinfo[iRealIndex].name, prMSeq->sqinfo[iRealIndex].len, ppcClusterSplits[iRealIndex]);
}
}
fclose(pfClust); pfClust = NULL;
......
......@@ -15,7 +15,7 @@
********************************************************************/
/*
* RCS $Id: pair_dist.c 288 2013-07-29 13:15:50Z andreas $
* RCS $Id: pair_dist.c 301 2016-06-13 13:32:55Z fabian $
*/
#ifdef HAVE_CONFIG_H
......@@ -35,6 +35,9 @@
#include "progress.h"
#include "util.h"
/* Made iend/jend const unsigned long int (originally just int), FS, 2016-04-04
*/
/* Up to rev 173 we had a USE_SYM_KTUPLE switch implemented here. When active
* ktuple distances were computed twice for each pair and averaged. Idea was
......@@ -57,8 +60,8 @@ KimuraCorrection(double frac_id);
static int
SquidIdPairDist(symmatrix_t *tmat, mseq_t *mseq,
int istart, int iend,
int jstart, int jend,
int istart, const unsigned long int iend,
int jstart, const unsigned long int jend,
bool use_KimuraCorrection, progress_t *prProgress,
unsigned long int *ulStepNo, unsigned long int ulTotalStepNo);
......@@ -167,8 +170,8 @@ KimuraCorrection(double p)
*/
int
SquidIdPairDist(symmatrix_t *tmat, mseq_t *mseq,
int istart, int iend,
int jstart, int jend,
int istart, const unsigned long int iend,
int jstart, const unsigned long int jend,
bool use_kimura, progress_t *prProgress,
unsigned long int *ulStepNo, unsigned long int ulTotalStepNo)
{
......@@ -272,8 +275,8 @@ SquidIdPairDist(symmatrix_t *tmat, mseq_t *mseq,
*/
int
PairDistances(symmatrix_t **distmat, mseq_t *mseq, int pairdist_type, bool bPercID,
int istart, int iend,
int jstart, int jend,
int istart, const unsigned long int iend,
int jstart, const unsigned long int jend,
char *fdist_in, char *fdist_out)
{
int uSeqIndex;
......@@ -315,26 +318,33 @@ PairDistances(symmatrix_t **distmat, mseq_t *mseq, int pairdist_type, bool bPerc
hence making even chunk sizes is slightly fiddlier
*/
ulTotalStepNo = iend*jend - iend*iend/2 + iend/2;
/* FIXME: can get rid of iChunkStart, iChunkEnd now that we're using the arrays */
iChunkStart = iend;
for(iChunk = 0; iChunk <= iNumberOfThreads; iChunk++)
{
iChunkEnd = iChunkStart;
if(iChunk == iNumberOfThreads - 1)
if (iChunk == iNumberOfThreads - 1){
iChunkStart = 0;
else
}
else if (iend == jend){
iChunkStart = iend - ((double)(iend - istart) * sqrt(((double)iChunk + 1.0)/(double)iNumberOfThreads));
}
else {
iChunkStart = iend - (iend - istart) * (iChunk + 1) / (double)(iNumberOfThreads);
}
iChunkStarts[iChunk] = iChunkStart;
iChunkEnds[iChunk] = iChunkEnd;
/*printf("%s:%d: C=%d, ie=%d, is=%d, je=%d, js=%d, Cstart=%d, Cend=%d, diff=%d\n",
__FILE__, __LINE__, iChunk, iend, istart, jend, jstart, iChunkStart, iChunkEnd, iChunkEnd-iChunkStart);*/
}
if (PAIRDIST_KTUPLE == pairdist_type) {
Log(&rLog, LOG_INFO, "Calculating pairwise ktuple-distances...");
NewProgress(&prProgress, LogGetFP(&rLog, LOG_INFO),
"Ktuple-distance calculation progress", bPrintCR);
"Ktuple-distance calculation progress", bPrintCR);
#ifdef HAVE_OPENMP
#pragma omp parallel for private(iChunk) schedule(dynamic)
#endif
......@@ -394,7 +404,7 @@ PairDistances(symmatrix_t **distmat, mseq_t *mseq, int pairdist_type, bool bPerc
}
#endif /* random/proper distance calculation */
/* optional printing of matrix to file
*/
if (NULL != fdist_out) {
......@@ -420,7 +430,7 @@ PairDistances(symmatrix_t **distmat, mseq_t *mseq, int pairdist_type, bool bPerc
ProgressDone(prProgress);
FreeProgress(&prProgress);
}
return 0;
}
/*** end: PairDistances() ***/
......
......@@ -13,7 +13,7 @@
********************************************************************/
/*
* RCS $Id: pair_dist.h 283 2013-06-10 17:42:14Z fabian $
* RCS $Id: pair_dist.h 302 2016-06-13 13:35:50Z fabian $
*/
......@@ -34,8 +34,8 @@
extern int
PairDistances(symmatrix_t **distmat, mseq_t *mseq, const int pairdist_type, bool bPercID,
const int istart, const int iend,
const int jstart, const int jend,
const int istart, const unsigned long int iend,
const int jstart, const unsigned long int jend,
char *fdist_in, char *fdist_out);
#endif
......
......@@ -15,7 +15,7 @@
********************************************************************/
/*
* RCS $Id: seq.c 291 2014-02-27 18:20:54Z fabian $
* RCS $Id: seq.c 298 2014-11-07 12:18:36Z fabian $
*
*
* Module for sequence/alignment IO and misc.
......@@ -37,7 +37,7 @@
#include "util.h"
#include "log.h"
#include "seq.h"
/*#include "../../mymemmonitor.h"*/
#define ALLOW_ONLY_PROTEIN 0 // DD
......@@ -419,11 +419,11 @@ SeqTypeToStr(int iSeqType)
int
ReadSequences(mseq_t *prMSeq, char *seqfile,
int iSeqType, int iSeqFmt, bool bIsProfile, bool bDealignInputSeqs,
int iMaxNumSeq, int iMaxSeqLen)
int iMaxNumSeq, int iMaxSeqLen, char *pcHMMBatch)
{
SQFILE *dbfp; /* sequence file descriptor */
char *cur_seq;
SQINFO cur_sqinfo;
char *cur_seq = NULL;
SQINFO cur_sqinfo = {0};
int iSeqIdx; /* sequence counter */
int iSeqPos; /* sequence string position counter */
......@@ -462,8 +462,8 @@ ReadSequences(mseq_t *prMSeq, char *seqfile,
*/
while (ReadSeq(dbfp, dbfp->format,
&cur_seq,
&cur_sqinfo)) {
&cur_sqinfo)) {
if (prMSeq->nseqs+1>iMaxNumSeq) {
Log(&rLog, LOG_ERROR, "Maximum number of sequences (=%d) exceeded after reading sequence '%s' from '%s'",
iMaxNumSeq, cur_sqinfo.name, seqfile);
......@@ -489,7 +489,8 @@ ReadSequences(mseq_t *prMSeq, char *seqfile,
prMSeq->sqinfo = (SQINFO *)
CKREALLOC(prMSeq->sqinfo, (prMSeq->nseqs+1) * sizeof(SQINFO));
CKREALLOC(prMSeq->sqinfo, (prMSeq->nseqs+1) * sizeof(SQINFO));
memset(&prMSeq->sqinfo[prMSeq->nseqs], 0, sizeof(SQINFO));
SeqinfoCopy(&prMSeq->sqinfo[prMSeq->nseqs], &cur_sqinfo);
#ifdef TRACE
......@@ -549,7 +550,7 @@ ReadSequences(mseq_t *prMSeq, char *seqfile,
prMSeq->nseqs++;
FreeSequence(cur_seq, &cur_sqinfo);
FreeSequence(cur_seq, &cur_sqinfo);
}
SeqfileClose(dbfp);
......@@ -616,6 +617,83 @@ ReadSequences(mseq_t *prMSeq, char *seqfile,
Log(&rLog, LOG_INFO, "Read %d sequences (type: %s) from %s",
prMSeq->nseqs, SeqTypeToStr(prMSeq->seqtype), prMSeq->filename);
prMSeq->pppcHMMBNames = NULL;
prMSeq->ppiHMMBindex = NULL;
/* read HMM-batch file if existent */
if (NULL != pcHMMBatch) {
enum {MAXLINE=10000};
FILE *pfHMMBatch = NULL;
char zcScanline[MAXLINE] = {0};
char *pcToken = NULL;
char *pcSeqName = NULL;
int iSeq = 0;
/* check that file exists */
if (NULL == (pfHMMBatch = fopen(pcHMMBatch, "r"))){
Log(&rLog, LOG_ERROR, "Failed to open HMM-batch file %s for reading", pcHMMBatch);
return -1;
}
/* initialise names and indices */
prMSeq->pppcHMMBNames = (char ***)CKMALLOC(prMSeq->nseqs * sizeof(char **));
for (iSeq = 0; iSeq < prMSeq->nseqs; iSeq++){
prMSeq->pppcHMMBNames[iSeq] = NULL;
}
prMSeq->ppiHMMBindex = (int **)CKMALLOC(prMSeq->nseqs * sizeof(int *));
for (iSeq = 0; iSeq < prMSeq->nseqs; iSeq++){
prMSeq->ppiHMMBindex[iSeq] = (int *)CKMALLOC(1 * sizeof(int));
prMSeq->ppiHMMBindex[iSeq][0] = -1;
}
/* read batch file line-by-line */
while (NULL != fgets(zcScanline, MAXLINE, pfHMMBatch)){
pcToken = strtok(zcScanline, " \040\t\n");
if (NULL == pcToken){
continue;
}
else {
pcSeqName = pcToken;
}
/* identify sequence label from batch file in labels read from sequence file */
for (iSeq = 0; iSeq < prMSeq->nseqs; iSeq++){
int iHMM = 0;
if (0 == strcmp(pcSeqName, prMSeq->sqinfo[iSeq].name)){
while (NULL != (pcToken = strtok(NULL, " \040\t\n"))){
prMSeq->pppcHMMBNames[iSeq] = (char **)CKREALLOC(prMSeq->pppcHMMBNames[iSeq],
(iHMM+2) * sizeof(char *));
prMSeq->pppcHMMBNames[iSeq][iHMM] = CkStrdup(pcToken);
prMSeq->ppiHMMBindex[iSeq] = (int *)CKREALLOC(prMSeq->ppiHMMBindex[iSeq],
(iHMM+2) * sizeof(int));
prMSeq->ppiHMMBindex[iSeq][iHMM] = 0;
iHMM++;
prMSeq->pppcHMMBNames[iSeq][iHMM] = NULL;
prMSeq->ppiHMMBindex[iSeq][iHMM] = 0;
}
break;
}
} /* 0 <= iSeq < prMSeq->nseqs */
if (iSeq >= prMSeq->nseqs) {
Log(&rLog, LOG_WARN,
"sequence %s not found in input sequences (%s), will be ignored",
pcSeqName, seqfile);
}
} /* !EOF */
fclose(pfHMMBatch); pfHMMBatch = NULL;
} /* there was a HMM batch file */
else {
prMSeq->pppcHMMBNames = NULL;
prMSeq->ppiHMMBindex = NULL;
} /* there was no HMM batch file */
return 0;
}
/*** end: ReadSequences ***/
......@@ -644,6 +722,8 @@ NewMSeq(mseq_t **prMSeq)
(*prMSeq)->sqinfo = NULL;
(*prMSeq)->filename = NULL;
(*prMSeq)->tree_order = NULL;
(*prMSeq)->pppcHMMBNames = NULL;
(*prMSeq)->ppiHMMBindex = NULL;
}
/*** end: NewMSeq ***/
......@@ -765,6 +845,14 @@ FreeMSeq(mseq_t **mseq)
CKFREE((*mseq)->tree_order);
}
if (NULL != (*mseq)->pppcHMMBNames){ /* FS, r291 -> */
for (i = 0; (*mseq)->pppcHMMBNames[i] && (i < (*mseq)->nseqs); i++){
int iIter = 0;
for (iIter = 0; NULL != (*mseq)->pppcHMMBNames[i][iIter]; iIter++){
CKFREE((*mseq)->pppcHMMBNames[i][iIter]);
}
}
}
(*mseq)->seqtype = SEQTYPE_UNKNOWN;
(*mseq)->nseqs = 0;
......
......@@ -13,7 +13,7 @@
********************************************************************/
/*
* RCS $Id: seq.h 289 2013-09-17 10:09:37Z fabian $
* RCS $Id: seq.h 296 2014-10-07 12:15:41Z fabian $
*/
#ifndef CLUSTALO_SEQ_H
......@@ -111,6 +111,10 @@ typedef struct {
*
*/
SQINFO *sqinfo;
/* HMM batch information */
char ***pppcHMMBNames;
int **ppiHMMBindex;
} mseq_t;
extern void
......@@ -131,7 +135,7 @@ SeqTypeToStr(int seqtype);
extern int
ReadSequences(mseq_t *prMSeq_p, char *pcSeqFile,
int iSeqType, int iSeqFmt, bool bIsProfile, bool bDealignInputSeqs,
int iMaxNumSeq, int iMaxSeqLen);
int iMaxNumSeq, int iMaxSeqLen, char *pcHMMBatch);
extern void