Skip to content
Commits on Source (6)
......@@ -51,7 +51,7 @@ add_definitions(-DIQ_TREE)
# The version number.
set (iqtree_VERSION_MAJOR 1)
set (iqtree_VERSION_MINOR 6)
set (iqtree_VERSION_PATCH "1")
set (iqtree_VERSION_PATCH "3")
set(BUILD_SHARED_LIBS OFF)
......@@ -277,7 +277,7 @@ if (NOT IQTREE_FLAGS MATCHES "single")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -pthread")
elseif (CLANG)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp=libomp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp=libomp -pthread")
endif()
else()
message("OpenMP : NONE")
......@@ -435,6 +435,8 @@ check_function_exists (gettimeofday HAVE_GETTIMEOFDAY)
check_function_exists (getrusage HAVE_GETRUSAGE)
check_function_exists (GlobalMemoryStatusEx HAVE_GLOBALMEMORYSTATUSEX)
check_function_exists (strndup HAVE_STRNDUP)
check_function_exists (strtok_r HAVE_STRTOK_R)
find_package(Backtrace)
# configure a header file to pass some of the CMake settings
......
......@@ -3006,15 +3006,29 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq
outError("Unsupported bootstrap feature, pls contact the developers");
}
if (Params::getInstance().jackknife_prop > 0.0 && spec) {
outError((string)"Unsupported jackknife with sampling " + spec);
}
IntVector site_vec;
if (!spec) {
// standard bootstrap
int added_sites = 0;
for (site = 0; site < nsite; site++) {
int site_id = random_int(nsite);
int site_id;
if (Params::getInstance().jackknife_prop == 0.0) {
// bootstrap sampling with replacement
site_id = random_int(nsite);
} else {
// jacknife without replacement
if (random_double() < Params::getInstance().jackknife_prop)
continue;
site_id = site;
}
int ptn_id = aln->getPatternID(site_id);
Pattern pat = aln->at(ptn_id);
int nptn = getNPattern();
addPattern(pat, site);
addPattern(pat, added_sites);
if (!aln->site_state_freq.empty() && getNPattern() > nptn) {
// a new pattern is added, copy state frequency vector
double *state_freq = new double[num_states];
......@@ -3022,7 +3036,10 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq
site_state_freq.push_back(state_freq);
}
if (pattern_freq) ((*pattern_freq)[ptn_id])++;
added_sites++;
}
if (added_sites < nsite)
site_pattern.resize(added_sites);
} else if (strncmp(spec, "GENESITE,", 9) == 0) {
// resampling genes, then resampling sites within resampled genes
convert_int_vec(spec+9, site_vec);
......@@ -3115,6 +3132,9 @@ void Alignment::createBootstrapAlignment(int *pattern_freq, const char *spec, in
int site, nsite = getNSite();
memset(pattern_freq, 0, getNPattern()*sizeof(int));
IntVector site_vec;
if (Params::getInstance().jackknife_prop > 0.0 && spec)
outError((string)"Unsupported jackknife with " + spec);
if (!spec || strncmp(spec, "SCALE=", 6) == 0) {
if (spec) {
......@@ -3123,10 +3143,17 @@ void Alignment::createBootstrapAlignment(int *pattern_freq, const char *spec, in
}
int nptn = getNPattern();
if (nsite/8 < nptn) {
if (nsite/8 < nptn || Params::getInstance().jackknife_prop > 0.0) {
int orig_nsite = getNSite();
for (site = 0; site < nsite; site++) {
int site_id = random_int(orig_nsite, rstream);
int site_id;
if (Params::getInstance().jackknife_prop == 0.0)
site_id = random_int(orig_nsite, rstream);
else {
if (random_double() < Params::getInstance().jackknife_prop)
continue;
site_id = site;
}
int ptn_id = getPatternID(site_id);
pattern_freq[ptn_id]++;
}
......
......@@ -679,10 +679,54 @@ Alignment *SuperAlignment::concatenateAlignments(set<int> &ids) {
}
Alignment *SuperAlignment::concatenateAlignments() {
set<int> ids;
for (int i = 0; i < partitions.size(); i++)
ids.insert(i);
return concatenateAlignments(ids);
vector<SeqType> seq_types;
vector<set<int> > ids;
for (int i = 0; i < partitions.size(); i++) {
bool found = false;
for (int j = 0; j < seq_types.size(); j++)
if (partitions[i]->seq_type == seq_types[j]) {
ids[j].insert(i);
found = true;
break;
}
if (found)
continue;
// create a new partition
seq_types.push_back(partitions[i]->seq_type);
ids.push_back(set<int>());
ids.back().insert(i);
}
if (seq_types.size() == 1)
return concatenateAlignments(ids[0]);
// mixed data with >= 2 partitions
SuperAlignment *saln = new SuperAlignment();
saln->max_num_states = 0;
// first build taxa_index and partitions
int site, seq, nsite = ids.size();
// BUG FIX 2016-11-29: when merging partitions with -m TESTMERGE, sequence order is changed
// get the taxa names from existing tree
saln->seq_names = seq_names;
saln->taxa_index.resize(saln->seq_names.size());
for (auto it = saln->taxa_index.begin(); it != saln->taxa_index.end(); it++)
it->resize(nsite, -1);
for (site = 0; site != nsite; site++) {
Alignment *part_aln = concatenateAlignments(ids[site]);
saln->partitions.push_back(part_aln);
int nseq = part_aln->getNSeq();
//cout << "nseq = " << nseq << endl;
for (seq = 0; seq < nseq; seq++) {
int id = saln->getSeqID(part_aln->getSeqName(seq));
ASSERT(id >= 0);
saln->taxa_index[id][site] = seq;
}
}
// now the patterns of sequence-genes presence/absence
saln->buildPattern();
return saln;
}
void SuperAlignment::countConstSite() {
......
iqtree (1.6.3+dfsg-1) unstable; urgency=medium
* New upstream version
* Point Vcs fields to salsa.debian.org
* Standards-Version: 4.1.4
-- Andreas Tille <tille@debian.org> Thu, 26 Apr 2018 15:34:19 +0200
iqtree (1.6.1+dfsg-1) unstable; urgency=medium
* New upstream version
......
......@@ -12,9 +12,9 @@ Build-Depends: debhelper (>= 11~),
zlib1g-dev,
help2man,
time
Standards-Version: 4.1.3
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/iqtree.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/iqtree.git
Standards-Version: 4.1.4
Vcs-Browser: https://salsa.debian.org/med-team/iqtree
Vcs-Git: https://salsa.debian.org/med-team/iqtree.git
Homepage: http://www.cibiv.at/software/iqtree/
Package: iqtree
......
......@@ -12,6 +12,8 @@
/*#cmakedefine HAVE_PCLOSE*/
/* does the platform provide GlobalMemoryStatusEx functions? */
#cmakedefine HAVE_GLOBALMEMORYSTATUSEX
#cmakedefine HAVE_STRNDUP
#cmakedefine HAVE_STRTOK_R
/* does the platform provide backtrace functions? */
#cmakedefine Backtrace_FOUND
......@@ -1031,7 +1031,7 @@ void mainlb(int n, int m, double *x,
--ifun;
--iback;
}
strcpy(task, "ERROR: ABNORMAL_TERMINATION_IN_LNSRCH");
strcpy(task, "WARNING: ABNORMAL_TERMINATION_IN_LNSRCH");
++iter;
goto L999;
} else {
......
......@@ -357,8 +357,10 @@ void reportModel(ofstream &out, Alignment *aln, ModelSubst *m) {
delete[] state_freqs;
out << endl;
}
if (m->num_states <= 4) {
if (m->num_states <= 4 || verbose_mode >= VB_MED) {
// report Q matrix
if (verbose_mode >= VB_MED)
out.precision(6);
double *q_mat = new double[m->num_states * m->num_states];
m->getQMatrix(q_mat);
......@@ -628,18 +630,19 @@ void reportTree(ofstream &out, Params &params, PhyloTree &tree, double tree_lh,
if (params.num_bootstrap_samples && params.compute_ml_tree) {
if (params.aLRT_replicates > 0 || params.aLRT_test || params.aBayes_test)
out << " /";
out << " standard bootstrap support (%)";
out << " standard " << ((params.jackknife_prop == 0.0) ? "bootstrap" : "jackknife") << " support (%)";
}
if (params.gbo_replicates) {
if (params.aLRT_replicates > 0 || params.aLRT_test || params.aBayes_test)
out << " /";
out << " ultrafast bootstrap support (%)";
out << " ultrafast " << ((params.jackknife_prop==0.0) ? "bootstrap" : "jackknife") << " support (%)";
}
out << endl;
}
out << endl;
//tree.setExtendedFigChar();
tree.setRootNode(params.root, true);
tree.drawTree(out, WT_BR_SCALE, epsilon);
out << "Tree in newick format:";
......@@ -650,6 +653,7 @@ void reportTree(ofstream &out, Params &params, PhyloTree &tree, double tree_lh,
out << endl << endl;
tree.printTree(out, WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA);
tree.setRootNode(params.root, false);
out << endl << endl;
}
......@@ -774,7 +778,7 @@ void printOutfilesInfo(Params &params, string &original_model, IQTree &tree) {
}
if (params.gbo_replicates) {
cout << endl << "Ultrafast bootstrap approximation results written to:" << endl
cout << endl << "Ultrafast " << ((params.jackknife_prop==0.0) ? "bootstrap" : "jackknife") << " approximation results written to:" << endl
<< " Split support values: " << params.out_prefix << ".splits.nex" << endl
<< " Consensus tree: " << params.out_prefix << ".contree" << endl;
if (params.print_ufboot_trees)
......@@ -857,11 +861,11 @@ void reportPhyloAnalysis(Params &params, string &original_model,
if (params.num_bootstrap_samples > 0) {
if (params.compute_ml_tree)
out << " + ";
out << "non-parametric bootstrap (" << params.num_bootstrap_samples
out << "non-parametric " << ((params.jackknife_prop==0.0) ? "bootstrap" : "jackknife") << " (" << params.num_bootstrap_samples
<< " replicates)";
}
if (params.gbo_replicates > 0) {
out << " + ultrafast bootstrap (" << params.gbo_replicates << " replicates)";
out << " + ultrafast " << ((params.jackknife_prop==0.0) ? "bootstrap" : "jackknife") << " (" << params.gbo_replicates << " replicates)";
}
out << endl;
out << "Random seed number: " << params.ran_seed << endl << endl;
......@@ -1084,7 +1088,7 @@ void reportPhyloAnalysis(Params &params, string &original_model,
out << "CONSENSUS TREE" << endl << "--------------" << endl << endl;
out << "Consensus tree is constructed from "
<< (params.num_bootstrap_samples ? params.num_bootstrap_samples : params.gbo_replicates)
<< " bootstrap trees";
<< ((params.jackknife_prop == 0.0) ? " bootstrap" : " jackknife") << " trees";
if (params.gbo_replicates || params.num_bootstrap_samples) {
out << endl << "Log-likelihood of consensus tree: " << fixed << tree.boot_consense_logl;
}
......@@ -1098,7 +1102,7 @@ void reportPhyloAnalysis(Params &params, string &original_model,
<< params.contree_rfdist << endl;
// --
out << endl << "Branches with bootstrap support >"
out << endl << "Branches with support >"
<< floor(params.split_threshold * 1000) / 10 << "% are kept";
if (params.split_threshold == 0.0)
out << " (extended consensus)";
......@@ -1108,7 +1112,7 @@ void reportPhyloAnalysis(Params &params, string &original_model,
out << " (strict consensus)";
out << endl << "Branch lengths are optimized by maximum likelihood on original alignment" << endl;
out << "Numbers in parentheses are bootstrap supports (%)" << endl << endl;
out << "Numbers in parentheses are " << ((params.jackknife_prop == 0.0) ? "bootstrap" : "jackknife") << " supports (%)" << endl << endl;
bool rooted = false;
MTree contree;
......@@ -1535,6 +1539,7 @@ void initializeParams(Params &params, IQTree &iqtree, ModelCheckpoint &model_inf
double cpu_time = getCPUTime();
double real_time = getRealTime();
model_info.setFileName((string)params.out_prefix + ".model.gz");
model_info.setDumpInterval(params.checkpoint_dump_interval);
bool ok_model_file = false;
if (!params.print_site_lh && !params.model_test_again) {
......@@ -1838,6 +1843,15 @@ void printMiscInfo(Params &params, IQTree &iqtree, double *pattern_lh) {
cout << "Branch lengths written to " << brlen_file << endl;
}
if (params.write_branches) {
string filename = string(params.out_prefix) + ".branches.csv";
ofstream out;
out.open(filename.c_str());
iqtree.writeBranches(out);
out.close();
cout << "Branch lengths written to " << filename << endl;
}
if (params.print_partition_info && iqtree.isSuperTree()) {
string partition_info = params.out_prefix;
partition_info += ".partinfo.nex";
......@@ -1958,12 +1972,13 @@ void printTrees(vector<string> trees, Params &params, string suffix) {
void runTreeReconstruction(Params &params, string &original_model, IQTree* &iqtree, ModelCheckpoint &model_info) {
if (params.root) {
string root_name = params.root;
if (iqtree->aln->getSeqID(root_name) < 0)
outError("Alignment does not have specified outgroup taxon ", params.root);
StrVector outgroup_names;
convert_string_vec(params.root, outgroup_names);
for (auto it = outgroup_names.begin(); it != outgroup_names.end(); it++)
if (iqtree->aln->getSeqID(*it) < 0)
outError("Alignment does not have specified outgroup taxon ", *it);
}
string dist_file;
params.startCPUTime = getCPUTime();
params.start_real_time = getRealTime();
......@@ -2388,7 +2403,7 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree* &iqtr
/****** perform SH-aLRT test ******************/
if ((params.aLRT_replicates > 0 || params.localbp_replicates > 0 || params.aLRT_test || params.aBayes_test) && !params.pll) {
double mytime = getCPUTime();
double mytime = getRealTime();
params.aLRT_replicates = max(params.aLRT_replicates, params.localbp_replicates);
cout << endl;
if (params.aLRT_replicates > 0)
......@@ -2404,7 +2419,7 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree* &iqtr
if (iqtree->isBifurcating()) {
iqtree->testAllBranches(params.aLRT_threshold, iqtree->getCurScore(),
pattern_lh, params.aLRT_replicates, params.localbp_replicates, params.aLRT_test, params.aBayes_test);
cout << "CPU Time used: " << getCPUTime() - mytime << " sec." << endl;
cout << getRealTime() - mytime << " sec." << endl;
} else {
outWarning("Tree is multifurcating and such test is not applicable");
params.aLRT_replicates = params.localbp_replicates = params.aLRT_test = params.aBayes_test = 0;
......@@ -2713,7 +2728,7 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
// do bootstrap analysis
for (int sample = bootSample; sample < params.num_bootstrap_samples; sample++) {
cout << endl << "===> START BOOTSTRAP REPLICATE NUMBER "
cout << endl << "===> START " << ((params.jackknife_prop==0.0) ? "BOOTSTRAP" : "JACKKNIFE") << " REPLICATE NUMBER "
<< sample + 1 << endl << endl;
// 2015-12-17: initialize random stream for creating bootstrap samples
......@@ -2722,7 +2737,11 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
init_random(params.ran_seed + sample);
Alignment* bootstrap_alignment;
if (Params::getInstance().jackknife_prop == 0.0)
cout << "Creating bootstrap alignment (seed: " << params.ran_seed+sample << ")..." << endl;
else
cout << "Creating jackknife alignment (seed: " << params.ran_seed+sample << ")..." << endl;
if (alignment->isSuperAlignment())
bootstrap_alignment = new SuperAlignment;
else
......@@ -2751,8 +2770,17 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
} else {
boot_tree = new PhyloSuperTree((SuperAlignment*) bootstrap_alignment, (PhyloSuperTree*) tree);
}
} else {
// allocate heterotachy tree if neccessary
int pos = posRateHeterotachy(params.model_name);
if (params.num_mixlen > 1) {
boot_tree = new PhyloTreeMixlen(bootstrap_alignment, params.num_mixlen);
} else if (pos != string::npos) {
boot_tree = new PhyloTreeMixlen(bootstrap_alignment, 0);
} else
boot_tree = new IQTree(bootstrap_alignment);
}
if (params.print_bootaln && MPIHelper::getInstance().isMaster()) {
if (bootstrap_alignment->isSuperAlignment())
((SuperAlignment*)bootstrap_alignment)->printCombinedAlignment(bootaln_name.c_str(), true);
......@@ -2830,8 +2858,8 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
if (params.consensus_type == CT_CONSENSUS_TREE && MPIHelper::getInstance().isMaster()) {
cout << endl << "===> COMPUTE CONSENSUS TREE FROM "
<< params.num_bootstrap_samples << " BOOTSTRAP TREES" << endl << endl;
cout << endl << "===> COMPUTE CONSENSUS TREE FROM " << params.num_bootstrap_samples
<< ((params.jackknife_prop==0.0) ? " BOOTSTRAP" : " JACKKNIFE") << " TREES" << endl << endl;
string root_name = (params.root) ? params.root : alignment->getSeqName(0);
const char* saved_root = params.root;
params.root = root_name.c_str();
......@@ -2856,7 +2884,8 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
optimizeConTree(params, tree);
}
cout << endl << "===> ASSIGN BOOTSTRAP SUPPORTS TO THE TREE FROM ORIGINAL ALIGNMENT" << endl << endl;
cout << endl << "===> ASSIGN " << ((params.jackknife_prop==0.0) ? " BOOTSTRAP" : " JACKKNIFE")
<< " SUPPORTS TO THE TREE FROM ORIGINAL ALIGNMENT" << endl << endl;
MExtTree ext_tree;
assignBootstrapSupport(boottrees_name.c_str(), 0, 1e6,
treefile_name.c_str(), false, treefile_name.c_str(),
......@@ -2879,12 +2908,12 @@ void runStandardBootstrap(Params &params, string &original_model, Alignment *ali
cout << endl;
if (MPIHelper::getInstance().isMaster()) {
cout << "Total CPU time for bootstrap: " << (getCPUTime() - start_time) << " seconds." << endl;
cout << "Total wall-clock time for bootstrap: " << (getRealTime() - start_real_time) << " seconds." << endl << endl;
cout << "Non-parametric bootstrap results written to:" << endl;
cout << "Total CPU time for " << ((params.jackknife_prop == 0.0) ? "bootstrap" : "jackknife") << ": " << (getCPUTime() - start_time) << " seconds." << endl;
cout << "Total wall-clock time for " << ((params.jackknife_prop == 0.0) ? "bootstrap" : "jackknife") << ": " << (getRealTime() - start_real_time) << " seconds." << endl << endl;
cout << "Non-parametric " << ((params.jackknife_prop == 0.0) ? "bootstrap" : "jackknife") << " results written to:" << endl;
if (params.print_bootaln)
cout << " Bootstrap alignments: " << params.out_prefix << ".bootaln" << endl;
cout << " Bootstrap trees: " << params.out_prefix << ".boottrees" << endl;
cout << ((params.jackknife_prop == 0.0) ? " Bootstrap" : " Jackknife") << " alignments: " << params.out_prefix << ".bootaln" << endl;
cout << ((params.jackknife_prop == 0.0) ? " Bootstrap" : " Jackknife") << " trees: " << params.out_prefix << ".boottrees" << endl;
if (params.consensus_type == CT_CONSENSUS_TREE)
cout << " Consensus tree: " << params.out_prefix << ".contree" << endl;
cout << endl;
......@@ -2897,7 +2926,10 @@ void convertAlignment(Params &params, IQTree *iqtree) {
if (params.num_bootstrap_samples || params.print_bootaln) {
// create bootstrap alignment
Alignment* bootstrap_alignment;
if (Params::getInstance().jackknife_prop == 0.0)
cout << "Creating bootstrap alignment..." << endl;
else
cout << "Creating jackknife alignment..." << endl;
if (alignment->isSuperAlignment())
bootstrap_alignment = new SuperAlignment;
else
......@@ -3173,7 +3205,7 @@ void runPhyloAnalysis(Params &params, Checkpoint *checkpoint) {
if (params.print_ufboot_trees)
tree->writeUFBootTrees(params);
cout << endl << "Computing bootstrap consensus tree..." << endl;
cout << endl << "Computing " << ((params.jackknife_prop == 0.0) ? "bootstrap" : "jackknife") << " consensus tree..." << endl;
string splitsfile = params.out_prefix;
splitsfile += ".splits.nex";
double weight_threshold = (params.split_threshold<1) ? params.split_threshold : (params.gbo_replicates-1.0)/params.gbo_replicates;
......@@ -3333,7 +3365,7 @@ void assignBootstrapSupport(const char *input_trees, int burnin, int max_count,
// compute the percentage of appearance
// printSplitSet(sg, hash_ss);
//sg.report(cout);
cout << "Creating bootstrap support values..." << endl;
cout << "Creating " << ((params->jackknife_prop == 0.0) ? "bootstrap" : "jackknife") << " support values..." << endl;
mytree.createBootstrapSupport(taxname, boot_trees, sg, hash_ss, params->support_tag);
//mytree.scaleLength(100.0/boot_trees.size(), true);
string out_file;
......@@ -3348,7 +3380,7 @@ void assignBootstrapSupport(const char *input_trees, int burnin, int max_count,
}
mytree.printTree(out_file.c_str());
cout << "Tree with assigned bootstrap support written to " << out_file
cout << "Tree with assigned support written to " << out_file
<< endl;
/*
if (out_prefix)
......
......@@ -36,6 +36,7 @@
#include "model/modelpomo.h"
#include "utils/timeutil.h"
#include "model/modelfactorymixlen.h"
#include "tree/phylosupertreeplen.h"
#include "phyloanalysis.h"
#include "gsl/mygsl.h"
......@@ -146,6 +147,31 @@ const char *codon_freq_names[] = {"", "+F1X4", "+F3X4", "+F"};
const double TOL_LIKELIHOOD_MODELTEST = 0.1;
const double TOL_GRADIENT_MODELTEST = 0.0001;
string getSeqTypeName(SeqType seq_type) {
switch (seq_type) {
case SEQ_BINARY: return "binary";
case SEQ_DNA: return "DNA";
case SEQ_PROTEIN: return "protein";
case SEQ_CODON: return "codon";
case SEQ_MORPH: return "morphological";
case SEQ_POMO: return "PoMo";
case SEQ_UNKNOWN: return "unknown";
case SEQ_MULTISTATE: return "MultiState";
}
}
string getUsualModelName(SeqType seq_type) {
switch (seq_type) {
case SEQ_DNA: return "GTR+F+G";
case SEQ_PROTEIN: return "LG+F+G";
case SEQ_CODON: return "GY"; break; // too much computation, thus no +G
case SEQ_BINARY: return "GTR2+G";
case SEQ_MORPH: return "MK+G";
case SEQ_POMO: return "GTR+P";
default: ASSERT(0 && "Unprocessed seq_type"); return "";
}
}
void ModelInfo::computeICScores(size_t sample_size) {
computeInformationScores(logl, df, sample_size, AIC_score, AICc_score, BIC_score);
}
......@@ -1249,7 +1275,17 @@ string testOneModel(string &model_name, Params &params, Alignment *in_aln,
int &num_threads, int brlen_type)
{
IQTree *iqtree = NULL;
if (posRateHeterotachy(model_name) != string::npos)
if (in_aln->isSuperAlignment()) {
SuperAlignment *saln = (SuperAlignment*)in_aln;
if (brlen_type == BRLEN_OPTIMIZE)
iqtree = new PhyloSuperTree(saln);
else
iqtree = new PhyloSuperTreePlen(saln);
StrVector model_names;
convert_string_vec(model_name.c_str(), model_names);
for (int part = 0; part != model_names.size(); part++)
((PhyloSuperTree*)iqtree)->part_info[part].model_name = model_names[part];
} else if (posRateHeterotachy(model_name) != string::npos)
iqtree = new PhyloTreeMixlen(in_aln, 0);
else
iqtree = new IQTree(in_aln);
......@@ -1312,6 +1348,14 @@ string testOneModel(string &model_name, Params &params, Alignment *in_aln,
iqtree->saveCheckpoint();
}
#ifdef _OPENMP
if (num_threads <= 0) {
num_threads = iqtree->testNumThreads();
omp_set_num_threads(num_threads);
} else
iqtree->warnNumThreads();
#endif
runTreeReconstruction(params, original_model, iqtree, model_info);
info.logl = iqtree->computeLikelihood();
info.tree_len = iqtree->treeLength();
......@@ -1440,12 +1484,45 @@ public:
};
string testConcatModel(Params &params, SuperAlignment *super_aln, ModelCheckpoint &model_info,
ModelsBlock *models_block, int num_threads, ModelInfo &concat_info)
{
Alignment *conaln = super_aln->concatenateAlignments();
string model_name;
int ssize = 0;
if (conaln->isSuperAlignment()) {
SuperAlignment *sconaln = (SuperAlignment*)conaln;
for (auto it = sconaln->partitions.begin(); it != sconaln->partitions.end(); it++) {
if (!model_name.empty())
model_name += ",";
model_name += getUsualModelName((*it)->seq_type);
ssize += (*it)->getNSite();
}
} else {
model_name = getUsualModelName(conaln->seq_type);
ssize = conaln->getNSite();
}
string concat_tree;
cout << "Testing " << model_name << " on supermatrix..." << endl;
concat_tree = testOneModel(model_name, params, conaln, model_info, concat_info,
models_block, num_threads, BRLEN_OPTIMIZE);
concat_info.computeICScores(ssize);
concat_info.saveCheckpoint(&model_info);
delete conaln;
return concat_tree;
}
/**
* select models for all partitions
* @param[in,out] model_info (IN/OUT) all model information
* @return total number of parameters
*/
void testPartitionModel(Params &params, PhyloSuperTree* in_tree, ModelCheckpoint &model_info, ModelsBlock *models_block, int num_threads) {
void testPartitionModel(Params &params, PhyloSuperTree* in_tree, ModelCheckpoint &model_info,
ModelsBlock *models_block, int num_threads)
{
// params.print_partition_info = true;
// params.print_conaln = true;
int i = 0;
......@@ -1483,6 +1560,7 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, ModelCheckpoint
if (num_threads <= 0) {
// partition selection scales well with many cores
num_threads = min((int64_t)countPhysicalCPUCores(), total_num_model);
num_threads = min(num_threads, params.num_threads_max);
omp_set_num_threads(num_threads);
cout << "NUMBER OF THREADS FOR PARTITION FINDING: " << num_threads << endl;
}
......@@ -1497,24 +1575,7 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, ModelCheckpoint
// Analysis on supermatrix
{
Alignment *conaln = super_aln->concatenateAlignments();
string model_name;
switch (conaln->seq_type) {
case SEQ_DNA: model_name = "GTR+F+G"; break;
case SEQ_PROTEIN: model_name = "LG+F+G"; break;
case SEQ_CODON: model_name = "GY"; break; // too much computation, thus no +G
case SEQ_BINARY: model_name = "GTR2+G"; break;
case SEQ_MORPH: model_name = "MK+G"; break;
case SEQ_POMO: model_name = "GTR+P"; break;
default: ASSERT(0 && "Unprocessed seq_type");
}
cout << "Testing " << model_name << " on supermatrix..." << endl;
concat_tree = testOneModel(model_name, params, conaln, model_info, concat_info,
models_block, num_threads, BRLEN_OPTIMIZE);
concat_info.computeICScores(ssize);
concat_info.saveCheckpoint(&model_info);
concat_tree = testConcatModel(params, super_aln, model_info, models_block, num_threads, concat_info);
// read tree with branch lengths for linked partition model
if (params.partition_type != BRLEN_OPTIMIZE && !concat_tree.empty()) {
......@@ -1531,7 +1592,6 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, ModelCheckpoint
cout << concat_info.name << " / LnL: " << concat_info.logl
<< " / df: " << concat_info.df << " / AIC: " << concat_info.AIC_score
<< " / AICc: " << concat_info.AICc_score << " / BIC: " << concat_info.BIC_score << endl;
delete conaln;
}
cout << "Selecting individual models for " << in_tree->size() << " charsets using " << criterionName(params.model_test_criterion) << "..." << endl;
......@@ -1700,6 +1760,9 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, ModelCheckpoint
}
ModelInfo best_model;
bool done_before = false;
#ifdef _OPENMP
#pragma omp critical
#endif
{
// if pairs previously examined, reuse the information
model_info.startStruct(cur_pair.set_name);
......@@ -1720,10 +1783,11 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, ModelCheckpoint
tree->sse = params.SSE;
tree->optimize_by_newton = params.optimize_by_newton;
tree->num_threads = params.model_test_and_tree ? num_threads : 1;
if (params.model_test_and_tree) {
/*if (params.model_test_and_tree) {
tree->setCheckpoint(new Checkpoint());
tree->saveCheckpoint();
} else {
} else*/
{
tree->setCheckpoint(&part_model_info);
// trick to restore checkpoint
tree->restoreCheckpoint();
......@@ -1732,9 +1796,11 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, ModelCheckpoint
best_model.name = testModel(params, tree, part_model_info, models_block,
params.model_test_and_tree ? num_threads : 1, params.partition_type, cur_pair.set_name);
best_model.restoreCheckpoint(&part_model_info);
/*
if (params.model_test_and_tree) {
delete tree->getCheckpoint();
}
*/
delete tree;
delete aln;
}
......@@ -1936,18 +2002,6 @@ ModelMarkov* getPrototypeModel(SeqType seq_type, PhyloTree* tree, char *model_se
return(subst_model);
}
*/
string getSeqTypeName(SeqType seq_type) {
switch (seq_type) {
case SEQ_BINARY: return "binary";
case SEQ_DNA: return "DNA";
case SEQ_PROTEIN: return "protein";
case SEQ_CODON: return "codon";
case SEQ_MORPH: return "morphological";
case SEQ_POMO: return "PoMo";
case SEQ_UNKNOWN: return "unknown";
case SEQ_MULTISTATE: return "MultiState";
}
}
string testModel(Params &params, PhyloTree* in_tree, ModelCheckpoint &model_info, ModelsBlock *models_block,
......@@ -2958,8 +3012,11 @@ void evaluateTrees(Params &params, IQTree *tree, vector<TreeInfo> &info, IntVect
continue;
}
tree->freeNode();
bool rooted = tree->rooted;
tree->readTree(in, rooted);
tree->readTree(in, tree->rooted);
if (tree->rooted && tree->getModel()->isReversible())
tree->convertToUnrooted();
else if (!tree->rooted && !tree->getModel()->isReversible())
tree->convertToRooted();
tree->setAlignment(tree->aln);
tree->setRootNode(params.root);
if (tree->isSuperTree())
......
......@@ -47,12 +47,12 @@
string::size_type posRateHeterotachy(string &model_name) {
string::size_type pos1 = 0, pos2 = 0;
do {
pos1 = model_name.find("+H", pos1);
pos1 = model_name.find("+H", pos1+1);
if (pos1 == string::npos) break;
} while (pos1 < model_name.length()-2 && isalpha(model_name[pos1+2]));
do {
pos2 = model_name.find("*H");
pos2 = model_name.find("*H", pos2+1);
if (pos2 == string::npos) break;
} while (pos2 < model_name.length()-2 && isalpha(model_name[pos2+2]));
......@@ -68,12 +68,12 @@ string::size_type posRateHeterotachy(string &model_name) {
string::size_type posRateFree(string &model_name) {
string::size_type pos1 = 0, pos2 = 0;
do {
pos1 = model_name.find("+R", pos1);
pos1 = model_name.find("+R", pos1+1);
if (pos1 == string::npos) break;
} while (pos1 < model_name.length()-2 && isalpha(model_name[pos1+2]));
do {
pos2 = model_name.find("*R");
pos2 = model_name.find("*R", pos2+1);
if (pos2 == string::npos) break;
} while (pos2 < model_name.length()-2 && isalpha(model_name[pos2+2]));
......@@ -89,12 +89,12 @@ string::size_type posRateFree(string &model_name) {
string::size_type posPOMO(string &model_name) {
string::size_type pos1 = 0, pos2 = 0;
do {
pos1 = model_name.find("+P", pos1);
pos1 = model_name.find("+P", pos1+1);
if (pos1 == string::npos) break;
} while (pos1 < model_name.length()-2 && isalpha(model_name[pos1+2]));
do {
pos2 = model_name.find("*P");
pos2 = model_name.find("*P", pos2+1);
if (pos2 == string::npos) break;
} while (pos2 < model_name.length()-2 && isalpha(model_name[pos2+2]));
......@@ -120,12 +120,21 @@ ModelsBlock *readModelsDefinition(Params &params) {
nexus.Add(models_block);
MyToken token(in);
nexus.Execute(token);
// int num_model = 0, num_freq = 0;
// for (ModelsBlock::iterator it = models_block->begin(); it != models_block->end(); it++)
// if ((*it).flag & NM_FREQ) num_freq++; else num_model++;
// cout << num_model << " models and " << num_freq << " frequency vectors loaded" << endl;
} catch (...) {
ASSERT(0 && "predefined mixture models initialized");
ASSERT(0 && "predefined mixture models not initialized");
}
try
{
// loading internal protei model definitions
stringstream in(builtin_prot_models);
ASSERT(in && "stringstream is OK");
NxsReader nexus;
nexus.Add(models_block);
MyToken token(in);
nexus.Execute(token);
} catch (...) {
ASSERT(0 && "predefined protein models not initialized");
}
if (params.model_def_file) {
......@@ -136,7 +145,7 @@ ModelsBlock *readModelsDefinition(Params &params) {
nexus.Execute(token);
int num_model = 0, num_freq = 0;
for (ModelsBlock::iterator it = models_block->begin(); it != models_block->end(); it++)
if ((*it).flag & NM_FREQ) num_freq++; else num_model++;
if (it->second.flag & NM_FREQ) num_freq++; else num_model++;
cout << num_model << " models and " << num_freq << " frequency vectors loaded" << endl;
}
return models_block;
......@@ -1260,8 +1269,6 @@ double ModelFactory::optimizeParameters(int fixed_len, bool write_info,
}
}
if (verbose_mode >= VB_MED || write_info)
cout << "Optimal log-likelihood: " << cur_lh << endl;
......
......@@ -22,6 +22,8 @@
#include <string.h>
#include "modelliemarkov.h"
#include "modelunrest.h"
#include <Eigen/Eigenvalues>
using namespace Eigen;
/** number of squaring for scaling-squaring technique */
......@@ -88,7 +90,7 @@ void ModelMarkov::setReversible(bool reversible) {
num_params = nrate - 1;
if (phylo_tree->rooted) {
if (phylo_tree && phylo_tree->rooted) {
cout << "Converting rooted to unrooted tree..." << endl;
phylo_tree->convertToUnrooted();
}
......@@ -118,7 +120,7 @@ void ModelMarkov::setReversible(bool reversible) {
if (!cinv_evec)
cinv_evec = aligned_alloc<complex<double> >(num_states*num_states);
if (!phylo_tree->rooted) {
if (phylo_tree && !phylo_tree->rooted) {
cout << "Converting unrooted to rooted tree..." << endl;
phylo_tree->convertToRooted();
}
......@@ -947,6 +949,19 @@ void ModelMarkov::decomposeRateMatrix(){
}
}
delete [] q;
/* } else if (Params::getInstance().matrix_exp_technique == MET_EIGEN3LIB_DECOMPOSITION) {
// Using Eigen3 libary for general time-reversible model
Eigen::MatrixXd Q;
Q.resize(num_states, num_states);
if (half_matrix) {
for (i = 0, k = 0; i < num_states; i++)
for (j = i+1; j < num_states; j++, k++) {
Q(i,j) = Q(j,i) = rates[k];
}
} else {
// full matrix
}
*/
} else {
// general reversible model
......@@ -1069,12 +1084,31 @@ void ModelMarkov::readStateFreq(string str) throw(const char*) {
}
void ModelMarkov::readParameters(const char *file_name) {
if (!fileExists(file_name))
outError("File not found ", file_name);
cout << "Reading model parameters from file " << file_name << endl;
// if detect if reading full matrix or half matrix by the first entry
try {
ifstream in(file_name);
double d;
in >> d;
if (d < 0) {
setReversible(false);
} else
setReversible(true);
in.close();
}
catch (...) {
outError(ERR_READ_ANY, file_name);
}
try {
ifstream in(file_name);
if (in.fail()) {
outError("Invalid model name ", file_name);
}
cout << "Reading model parameters from file " << file_name << endl;
readRates(in);
readStateFreq(in);
in.close();
......@@ -1084,6 +1118,51 @@ void ModelMarkov::readParameters(const char *file_name) {
}
num_params = 0;
writeInfo(cout);
if (!is_reversible) {
// check consistency of state_freq
double saved_state_freq[num_states];
memcpy(saved_state_freq, state_freq, sizeof(double)*num_states);
decomposeRateMatrix();
for (int i = 0; i < num_states; i++)
if (fabs(state_freq[i] - saved_state_freq[i]) > 1e-3)
cout << "WARNING: State " << i << " frequency " << state_freq[i]
<< " does not match " << saved_state_freq[i] << endl;
}
}
void ModelMarkov::readParametersString(string &model_str) {
// if detect if reading full matrix or half matrix by the first entry
int end_pos;
double d = 0.0;
d = convert_double(model_str.c_str(), end_pos);
if (d < 0) {
setReversible(false);
} else
setReversible(true);
try {
stringstream in(model_str);
readRates(in);
readStateFreq(in);
}
catch (const char *str) {
outError(str);
}
num_params = 0;
writeInfo(cout);
if (!is_reversible) {
// check consistency of state_freq
double saved_state_freq[num_states];
memcpy(saved_state_freq, state_freq, sizeof(double)*num_states);
decomposeRateMatrix();
for (int i = 0; i < num_states; i++)
if (fabs(state_freq[i] - saved_state_freq[i]) > 1e-3)
cout << "WARNING: State " << i << " frequency " << state_freq[i]
<< " does not match " << saved_state_freq[i] << endl;
}
}
......
......@@ -164,10 +164,15 @@ public:
/**
read model parameters from a file
@param file_name file containing upper-triangle rate matrix and state frequencies
@param file_name file containing rate matrix and state frequencies
*/
void readParameters(const char *file_name);
/**
read model parameters from a string
@param model_str string containing rate matrix and state frequencies
*/
void readParametersString(string &model_str);
/**
compute the transition probability matrix.
......
Source diff could not be displayed: it is too large. Options to address this: view the blob.
......@@ -17,7 +17,7 @@
const char OPEN_BRACKET = '{';
const char CLOSE_BRACKET = '}';
extern const string builtin_mixmodels_definition;
extern const char* builtin_mixmodels_definition;
/**
* create a substitution model
......
......@@ -17,21 +17,26 @@ void ModelMorphology::init(const char *model_name, string model_params, StateFre
{
name = model_name;
full_name = model_name;
freq = FREQ_EQUAL;
if (name == "MK") {
// all were initialized
num_params = 0;
} else if (name == "ORDERED") {
int k = 0;
int i, j, k = 0;
// only allow for substitution from state i to state i+1 and back.
for (int i = 0; i < num_states-1; i++) {
for (i = 0; i < num_states-1; i++) {
rates[k++] = 1.0;
for (int j = i+2; j < num_states; j++, k++)
for (j = i+2; j < num_states; j++, k++)
rates[k] = 0.0;
}
num_params = 0;
} else if (name == "GTR") {
outWarning("GTR multistate model will estimate " + convertIntToString(getNumRateEntries()-1) + " substitution rates that might be overfitting!");
outWarning("Please only use GTR with very large data and always test for model fit!");
} else {
// if name does not match, read the user-defined model
readParameters(model_name);
num_params = 0;
freq = FREQ_USER_DEFINED;
}
ModelMarkov::init(freq);
}
......@@ -57,6 +62,12 @@ void ModelMorphology::readRates(istream &in) throw(const char*, string) {
}
}
int ModelMorphology::getNDim() {
int ndim = num_params;
if (freq_type == FREQ_ESTIMATE)
ndim += num_states-1;
return ndim;
}
ModelMorphology::~ModelMorphology() {
}
......@@ -64,3 +75,64 @@ ModelMorphology::~ModelMorphology() {
void ModelMorphology::startCheckpoint() {
checkpoint->startStruct("ModelMorph");
}
void ModelMorphology::saveCheckpoint() {
startCheckpoint();
if (num_params > 0)
CKP_ARRAY_SAVE(getNumRateEntries(), rates);
endCheckpoint();
ModelMarkov::saveCheckpoint();
}
void ModelMorphology::restoreCheckpoint() {
ModelMarkov::restoreCheckpoint();
startCheckpoint();
if (num_params > 0)
CKP_ARRAY_RESTORE(getNumRateEntries(), rates);
endCheckpoint();
decomposeRateMatrix();
if (phylo_tree)
phylo_tree->clearAllPartialLH();
}
string ModelMorphology::getNameParams() {
if (num_params == 0) return name;
ostringstream retname;
retname << name << '{';
int nrates = getNumRateEntries();
for (int i = 0; i < nrates; i++) {
if (i>0) retname << ',';
retname << rates[i];
}
retname << '}';
getNameParamsFreq(retname);
return retname.str();
}
void ModelMorphology::writeParameters(ostream &out) {
int i;
if (freq_type == FREQ_ESTIMATE) {
for (i = 0; i < num_states; i++)
out << "\t" << state_freq[i];
}
if (num_params == 0) return;
int nrateout = getNumRateEntries() - 1;
for (i = 0; i < nrateout; i++)
out << "\t" << rates[i];
}
void ModelMorphology::writeInfo(ostream &out) {
if (num_params > 0) {
out << "Rate parameters:";
int nrate = getNumRateEntries();
for (int i = 0; i < nrate; i++)
out << " " << rates[i];
out << endl;
}
if (freq_type != FREQ_EQUAL) {
out << "State frequencies:";
for (int i = 0; i < num_states; i++)
out << " " << state_freq[i];
out << endl;
}
}
......@@ -34,15 +34,42 @@ public:
*/
virtual void init(const char *model_name, string model_params, StateFreqType freq, string freq_params);
/**
return the number of dimensions
*/
virtual int getNDim();
/**
start structure for checkpointing
*/
virtual void startCheckpoint();
/**
return the number of dimensions
save object into the checkpoint
*/
virtual void saveCheckpoint();
/**
restore object from the checkpoint
*/
virtual void restoreCheckpoint();
/**
* @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
*/
virtual string getNameParams();
/**
write information
@param out output stream
*/
virtual void writeInfo(ostream &out);
/**
write parameters, used with modeltest
@param out output stream
*/
virtual int getNDim() { return 0; }
virtual void writeParameters(ostream &out);
/**
read the rates from an input stream. it will throw error messages if failed
......
This diff is collapsed.
......@@ -22,6 +22,8 @@
#include "modelmarkov.h"
extern const char* builtin_prot_models;
/**
Substitution models for protein sequences
......@@ -36,7 +38,7 @@ public:
@param freq state frequency type
@param tree associated phylogenetic tree
*/
ModelProtein(const char *model_name, string model_params, StateFreqType freq, string freq_params, PhyloTree *tree);
ModelProtein(const char *model_name, string model_params, StateFreqType freq, string freq_params, PhyloTree *tree, ModelsBlock *models_block);
/**
initialization, called automatically by the constructor, no need to call it
......@@ -71,6 +73,8 @@ public:
*/
virtual string getNameParams() { return name; }
private:
ModelsBlock *models_block;
};
......
......@@ -7,7 +7,7 @@
#include "modelsblock.h"
ModelsBlock::ModelsBlock() : NxsBlock(), vector<NxsModel>()
ModelsBlock::ModelsBlock() : NxsBlock(), unordered_map<string, NxsModel>()
{
id = "MODELS";
}
......@@ -51,7 +51,7 @@ void ModelsBlock::Read(NxsToken &token)
model.flag |= (NM_ATOMIC*(model.description.find_first_of("+*") == string::npos && model.description.find("MIX") == string::npos));
push_back(model);
insert({model.name, model});
} else if (token.Equals("END") || token.Equals("ENDBLOCK")) {
// Get the semicolon following END
......@@ -73,18 +73,16 @@ void ModelsBlock::Read(NxsToken &token)
}
}
NxsModel *ModelsBlock::findModel(string &name) {
for (iterator it = begin(); it != end(); it++)
if (it->name == name) return &(*it);
return NULL;
NxsModel *ModelsBlock::findModel(string name) {
iterator it = find(name);
if (it == end()) return NULL;
return &(it->second);
}
NxsModel *ModelsBlock::findMixModel(string &name) {
for (iterator it = begin(); it != end(); it++)
if (it->name == name) {
if ((it->flag & NM_ATOMIC) == 0)
return &(*it);
else return NULL;
}
NxsModel *ModelsBlock::findMixModel(string name) {
NxsModel *model = findModel(name);
if (!model) return NULL;
if (model->flag & NM_ATOMIC)
return NULL;
return model;
}
......@@ -9,9 +9,11 @@
#define MODELSBLOCK_H_
#include "ncl/ncl.h"
#include "utils/tools.h"
const int NM_ATOMIC = 1; // NxsModel is not mixture or +G etc. model
const int NM_FREQ = 2; // NxsModel contains state frequency
const int NM_PROTEIN = 4; // NxsModel contains emprical protein model
class NxsModel {
public:
......@@ -39,7 +41,7 @@ public:
/**
* Class to parse MODELS block in NEXUS file
*/
class ModelsBlock: public NxsBlock, public vector<NxsModel> {
class ModelsBlock: public NxsBlock, public unordered_map<string, NxsModel> {
public:
/** constructor */
ModelsBlock();
......@@ -50,13 +52,13 @@ public:
@param name model name
@return pointer to model with the name or NULL if not found
*/
NxsModel *findModel(string &name);
NxsModel *findModel(string name);
/**
@param name model name
@return pointer to a mixed model with the name or NULL if not found
*/
NxsModel *findMixModel(string &name);
NxsModel *findMixModel(string name);
protected:
......