Skip to content
Commits on Source (5)
paml (4.9j+dfsg-1) unstable; urgency=medium
* New upstream version
* Also cleaning binaries after build
* Standards-Version: 4.4.1
-- Steffen Moeller <moeller@debian.org> Sun, 10 Nov 2019 18:54:50 +0100
paml (4.9i+dfsg-1) unstable; urgency=medium
* New upstream version
......
......@@ -6,7 +6,7 @@ Uploaders: Pjotr Prins <pjotr.debian@thebird.nl>,
Section: science
Priority: optional
Build-Depends: debhelper-compat (= 12)
Standards-Version: 4.4.0
Standards-Version: 4.4.1
Vcs-Browser: https://salsa.debian.org/med-team/paml
Vcs-Git: https://salsa.debian.org/med-team/paml.git
Homepage: http://abacus.gene.ucl.ac.uk/software/paml.html
......
......@@ -39,3 +39,8 @@ override_dh_link-arch:
override_dh_fixperms-arch:
dh_fixperms -a
find debian/*/usr/lib/paml/data -type f -exec chmod -x \{\} \;
override_dh_auto_clean:
dh_auto_clean
rm -f src/baseml src/basemlg src/chi2 src/codeml src/evolver src/infinitesites src/mcmctree src/pamp src/yn00
......@@ -8,6 +8,31 @@ discussion site
https://groups.google.com/forum/#!forum/pamlsoftware.
Version 4.9j, October 2019
(*) mcmctree, initial values have ages that are too old, exceeding max
bounds. In theory bounds in mcmctree are always soft so that the node
ages will move to the area of posterior mode when burnin is long
enough. In practice the poor starting values are problematic and
requires long burn-in. I have rewritten the code for generating
initial values to respect the min and max bounds specified in the
fossil calibrations.
(*) codeml, clade labels in the tree file. A bug introduced in 4.9i
caused the clade labels ($) to be ignored. This affects the branch,
branch-site and clade models. If your tree has branch labels (#) only
and no clade models, everything will be fine. If you have used the
clade label ($) in the tree with or without the branch label (#),
either the program will abort or the results will be incorrect. The
clade label ($) is supposed to label all branches in the clade as well
as the branch itself, but all clade labels in the tree are ignored in
4.9i. Earlier versions are correct.
Thanks to Yonghua Wu for pointing out the bug.
Version 4.9i, March 2019
(*) mcmctree: I have added an option (duplication = 1) for dating a
......
This diff is collapsed.
......@@ -272,7 +272,7 @@ int main(int argc, char *argv[])
if ((k = stree.nodes[i].fossil) == 0) continue;
printf("Node %3d: %3s ( ", i + 1, fossils[k]);
for (j = 0; j < npfossils[k]; j++) {
printf("%6.4f", stree.nodes[i].pfossil[j + (k == UPPER_F)]);
printf("%7.4f", stree.nodes[i].pfossil[j + (k == UPPER_F)]);
printf("%s", (j == npfossils[k] - 1 ? " )\n" : ", "));
}
}
......@@ -1038,7 +1038,7 @@ int GetOptions(char *ctlf)
{
int transform0 = ARCSIN_B; /* default transform: SQRT_B, LOG_B, ARCSIN_B */
int iopt, i, j, nopt = 31, lline = 4096;
char line[4096], *pline, *peq, opt[33], *comment = "*#";
char line[4096], *pline, opt[33], *comment = "*#";
char *optstr[] = { "seed", "seqfile","treefile", "outfile", "mcmcfile", "BayesFactorBeta",
"seqtype", "aaRatefile", "icode", "noisy", "usedata", "ndata", "duplication", "model", "clock",
"TipDate", "RootAge", "fossilerror", "alpha", "ncatG", "cleandata",
......@@ -1595,128 +1595,213 @@ double Infinitesites(FILE *fout)
int GetInitialsTimes(void)
{
/* This sets calibration node ages and root age first, and then populates other node ages, by
a break-stick strategy.
It ensures that each node age is younger than its parent's age, but does not check
the consistency of ages against the fossil constraints. The use of soft bounds means that
any ages are possible, even if the chain may start from a poor place.
In the case of shared/mirrored node ages due to gene duplication, it is not so clear whether
conflicts may still arise.
/* This sets ages for nodes with bounds first. Then it goes through a loop looking for nodes
with the largest min calibration, and another loop looking for the longest path, each
case populating node ages on the path until reaching an ancestral node with assigned age or
with max bound.
In the case of shared/mirrored node ages due to gene duplication, conflicts may still arise,
so that a loop of corrections is used.
stree.duplication:
stree.nodes[i].label = 0: usual node, which may and may not have calibration.
stree.nodes[j].label =-1: node i is the driver node, whose age is shared by other nodes.
It may and may not have calibration.
stree.nodes[j].label = i: node j shares the age of node i, and doesn't have calibration.
*/
int s = stree.nspecies, i, j, k, k1, ir, ncorrections, driver;
int maxdepth = 0, from, to, depth;
double *p, a, b, d, *r, agefrom, ageto;
int s = stree.nspecies, s21 = s * 2 - 1, root = stree.root;
int i, j, k, k1, ir, ncorrections, driver, from, to, depth, maxdepth;
double *p, a, b, d, tbpath[2], *tmin, *tmax, *r;
char *pptable, *flag;
int debug = 1;
if (com.TipDate && stree.nodes[stree.root].fossil != BOUND_F)
error2("\nTipDate model requires bounds on the root age..\n");
/* set up ages of calibration nodes */
for (i = s; i < s * 2 - 1; i++) stree.nodes[i].age = -1;
for (i = s; i < s*2-1; i++) {
/* set up pop-pop table of ancestor-descendent relationships. */
pptable = (char*)malloc(s21*(s21 + 1) * sizeof(char));
if (pptable == NULL) error2("oom pptable");
flag = pptable + s21*s21;
memset(pptable, 0, s21*(s21 + 1) * sizeof(char));
tmin = (double*)malloc(s21 * 2 * sizeof(double));
if (tmin == NULL) error2("oom tmin");
memset(tmin, 0, s21 * 2 * sizeof(double));
tmax = tmin + s21;
r = (double*)malloc(s * sizeof(double));
if (r == NULL) error2("oom r");
for (i = 0; i < s21; i++) pptable[i*s21 + i] = pptable[i*s21 + root] = (char)1;
for (i = 0; i < s21; i++) {
for (j = i; j != root; j = stree.nodes[j].father)
pptable[i*s21 + j] = (char)1;
}
for (i = s; i < s21; i++) {
if (stree.nodes[i].label != -1) continue; /* -1 means driver */
for (j = s; j < s21; j++) {
if (stree.nodes[j].label == i) /* if j mirrors i, ancestors of j are also ancestors of i. */
for (k = j; k != root && pptable[i*s21 + k] == 0; k = stree.nodes[k].father) {
pptable[i*s21 + k] = (char)1;
}
}
/* if j mirrors i, copy ancestors of i to j */
for (j = s; j < s21; j++) {
if (stree.nodes[j].label == i)
memcpy(pptable + j*s21 + s, pptable + i*s21 + s, (s - 1) * sizeof(char));
}
}
if (debug && s<20) {
printf("\n\nSetting up initial node ages...\npop-pop table:\n ");
for (j = 0; j < s21; j++) printf(" %2d", j);
for (i = 0; i < s21; i++) {
printf("\n%2d: ", i);
for (j = 0; j < s21; j++) printf(" %2d", pptable[i*s21 + j]);
if (i >= s) {
printf(" %3s ", (stree.nodes[i].fossil ? fossils[stree.nodes[i].fossil] : ""));
printf(" label = %2d, ", stree.nodes[i].label);
if (stree.nodes[i].label == -1) printf("driver");
else if (stree.nodes[i].label > 0) printf("mirror");
}
}
}
/* retrieve tmin & rmax from calibrations */
for (i = s; i < s21; i++) stree.nodes[i].age = -1;
for (i = s; i < s21; i++) {
if (stree.nodes[i].fossil == 0) continue;
p = stree.nodes[i].pfossil;
switch (stree.nodes[i].fossil) {
case (LOWER_F): stree.nodes[i].age = p[0] * (1.05 + 0.1*rndu()); break;
case (UPPER_F): stree.nodes[i].age = p[1] * (0.6 + 0.4*rndu()); break;
case (BOUND_F): stree.nodes[i].age = p[0] + (p[1] - p[0])*(0.2 + rndu()*1.6); break;
case (GAMMA_F): stree.nodes[i].age = p[0] / p[1] * (0.7 + rndu()*0.6); break;
case (SKEWN_F): case(SKEWT_F):
d = p[2] / sqrt(1 + p[2] * p[2]);
a = p[0] + p[1] * d*sqrt(2 / Pi);
stree.nodes[i].age = a * (0.6 + 0.4*rndu());
case (LOWER_F): tmin[i] = p[0]; break;
case (UPPER_F): tmax[i] = p[1]; break;
case (BOUND_F): tmin[i] = p[0]; tmax[i] = p[1]; break;
case (GAMMA_F):
tmin[i] = QuantileGamma(0.025, p[0], p[1]); tmax[i] = QuantileGamma(0.975, p[0], p[1]);
break;
case (SKEWN_F):
case (SKEWT_F):
case (S2N_F):
d = p[3] / sqrt(1 + p[3] * p[3]);
a = (p[1] + p[2] * d*sqrt(2 / Pi)); /* mean of SN 1 */
d = p[6] / sqrt(1 + p[6] * p[6]);
b = (p[4] + p[5] * d*sqrt(2 / Pi)); /* mean of SN 2 */
stree.nodes[i].age = (p[0] * a + b) * (0.6 + 0.4*rndu());
break;
error2("implement SkewN, Skewt, S2N now");
}
}
for (i = s; i < s21; i++) {
for (j = s; j < s21; j++)
if (j != i && pptable[i*s21 + j] && tmax[j] > 0 && tmin[i] > tmax[j]) {
printf("\n\ndaughter node %3d minimun age %9.4f \nmother node %3d max age %9.4f\n", i + 1, tmin[i], j + 1, tmax[j]);
error2("strange calibration?");
}
}
for (i = s; i < s21; i++) {
if (tmin[i]) /* remove duplicated min bounds for ancestors of i */
for (j = s; j < s21; j++)
if (j != i && pptable[i*s21 + j] && tmin[j] <= tmin[i])
tmin[j] = 0;
if (tmax[i]) /* remove duplicated max bounds for descendents of i */
for (j = s; j < s21; j++) {
if (j != i && pptable[j*s21 + i] && tmax[j] >= tmax[i])
tmax[j] = 0;
}
}
if (debug) {
printf("\n\nbounds on nodes after removing duplicated min and max bounds");
for (i = s; i < s21; i++) {
if(tmin[i] || tmax[i])
printf("\n%2d: (%8.4f, %8.4f)", i+1, tmin[i], tmax[i]);
}
}
/* generate ages for bounded nodes if possible. */
for (i = s; i < s21; i++) {
if (tmin[i] && tmax[i])
stree.nodes[i].age = tmin[i] + (tmax[i] - tmin[i])*rndu();
}
if (tmax[root] == 0) error2("we need a maximum-age bound on root??");
/* copy nodes[i].age to all mirrored nodes */
driver = (stree.nodes[i].label >= s ? stree.nodes[i].label : (stree.nodes[i].label == -1 ? i : 0));
if (driver) {
for (k = s; k < s * 2 - 1; k++) {
if (k!=i && (k==driver || stree.nodes[k].label == driver))
stree.nodes[k].age = stree.nodes[i].age;
/* look for largest unused tmin, and assign ages on path from node to ancestor with age or with tmax. */
for (ir = 0; ir < s; ir++) {
tbpath[0] = 0;
for (i = s; i < s21; i++) { /* find max tmin and store in tbpath[0]. */
if (stree.nodes[i].age == -1 && tbpath[0] < tmin[i]) {
tbpath[0] = tmin[i]; from = i;
}
}
if (tbpath[0] == 0) break;
maxdepth = 1;
for (j = stree.nodes[from].father; j != -1; j = stree.nodes[j].father, maxdepth++) {
if (stree.nodes[j].age > 0)
break;
else if (tmax[j] > 0) {
maxdepth++;
break;
}
}
to = j;
tbpath[1] = (stree.nodes[to].age > 0 ? stree.nodes[to].age : tmax[to]);
/* check for consistency among calibration and mirrored nodes, and correct if needed. */
for (ir = 0; ir < 100; ir++) {
ncorrections = 0;
for (i = 0; i < s * 2 - 1; i++) {
if (stree.nodes[i].age == -1) continue;
/* j is calibration node ancestral to i. */
for (j = stree.nodes[i].father; j != -1; j = stree.nodes[j].father)
if (stree.nodes[j].age > 0) break;
if (j != -1 && stree.nodes[j].age < stree.nodes[i].age) {
stree.nodes[j].age = stree.nodes[i].age * (1.001 + 0.01*rndu());
ncorrections++;
/* copy nodes[j].age to all mirrored nodes */
for (j = 0; j < maxdepth; j++) r[j] = rndu();
if (maxdepth>1) qsort(r, (size_t)maxdepth, sizeof(double), comparedouble);
for (j = from, k = 0; k<maxdepth; j = stree.nodes[j].father, k++) {
stree.nodes[j].age = tbpath[0] + (tbpath[1] - tbpath[0])*r[k];
driver = (stree.nodes[j].label >= s ? stree.nodes[j].label : (stree.nodes[j].label == -1 ? j : 0));
if (driver) {
for (k = s; k < s * 2 - 1; k++) {
if (k != j && (k == driver || stree.nodes[k].label == driver))
stree.nodes[k].age = stree.nodes[j].age;
}
for (k1 = s; k1 < s21; k1++) {
if (k1 != j && (k1 == driver || stree.nodes[k1].label == driver))
stree.nodes[k1].age = stree.nodes[j].age;
}
}
}
if (ncorrections == 0) break;
printf("\n*** min-age round %2d: from nodes %3d (%9.5f) to %3d, %3d nodes ", ir+1, from+1, stree.nodes[from].age, to+1, maxdepth);
if(debug && s<200) printStree();
}
if (ir == 100)
puts("i did 100 rounds, but we need more!");
/* look for the deepest path from a node to its ancestor, and assign ages to all nodes inbetween. */
r = (double*)malloc(s*sizeof(double));
if (r == NULL) error2("oom r");
for (ir = 0; ir < s * 2 - 1; ir++) {
for (i = 0, maxdepth = 0; i < s * 2 - 1; i++) {
if (stree.nodes[i].age == -1) continue;
depth = 0;
for (j = stree.nodes[i].father; j != -1; j = stree.nodes[j].father) {
if (stree.nodes[j].age > 0) break;
/* look for longest path, and assign ages on path from node to ancestor with age or with tmax. */
for (ir = 0; ir < s; ir++) {
maxdepth = 0;
for (i = s; i < s21; i++) {
if (stree.nodes[i].age > 0) continue;
for (j = stree.nodes[i].father, depth = 1; j != -1; j = stree.nodes[j].father, depth++) {
if (stree.nodes[j].age > 0)
break;
else if (tmax[j] > 0) {
depth++;
break;
}
}
if (maxdepth < depth) {
maxdepth = depth; from = i; to = j;
}
}
if (maxdepth == 0) break; /* all ages are assigned. */
int *sons = stree.nodes[from].sons;
tbpath[0] = max2(stree.nodes[sons[0]].age, stree.nodes[sons[1]].age);
tbpath[1] = (stree.nodes[to].age > 0 ? stree.nodes[to].age : tmax[to]);
for (j = 0; j < maxdepth; j++) r[j] = rndu();
qsort(r, (size_t)maxdepth, sizeof(double), comparedouble);
agefrom = stree.nodes[from].age;
ageto = stree.nodes[to].age;
for (j = stree.nodes[from].father, k = 0; j != to; j = stree.nodes[j].father, k++) {
stree.nodes[j].age = agefrom + (ageto - agefrom)*r[k];
if(maxdepth>1) qsort(r, (size_t)maxdepth, sizeof(double), comparedouble);
for (j = from, k = 0; k<maxdepth; j = stree.nodes[j].father, k++) {
stree.nodes[j].age = tbpath[0] + (tbpath[1] - tbpath[0])*r[k];
driver = (stree.nodes[j].label >= s ? stree.nodes[j].label : (stree.nodes[j].label == -1 ? j : 0));
if (driver) {
for (k1 = s; k1 < s * 2 - 1; k1++) {
for (k1 = s; k1 < s21; k1++) {
if (k1 != j && (k1 == driver || stree.nodes[k1].label == driver))
stree.nodes[k1].age = stree.nodes[j].age;
}
}
}
printf("\n*** longest-path round %2d: from nodes %3d (%9.5f) to %3d, %3d nodes ", ir + 1, from + 1, stree.nodes[from].age, to + 1, maxdepth);
if (debug && s<200) printStree();
}
/* check for consistency for all nodes, and correct if needed. */
for (ir = 0; ir < 100; ir++) {
ncorrections = 0;
for (i = 0; i < s * 2 - 1; i++) {
for (i = 0; i < s21; i++) {
j = stree.nodes[i].father;
if (j == -1 || stree.nodes[i].age < stree.nodes[j].age) continue;
printf("\n*** correction round %2d nodes %2d & %2d, ages %9.5f %9.5f", ir+1, i+1, j+1, stree.nodes[i].age, stree.nodes[j].age);
ncorrections++;
stree.nodes[j].age = stree.nodes[i].age * (1.001 + 0.01*rndu());
/* copy nodes[j].age to all mirrored nodes */
driver = (stree.nodes[j].label >= s ? stree.nodes[j].label : (stree.nodes[j].label == -1 ? j : 0));
if (driver) {
for (k = s; k < s * 2 - 1; k++) {
for (k = s; k < s21; k++) {
if (k != j && (k == driver || stree.nodes[k].label == driver))
stree.nodes[k].age = stree.nodes[j].age;
}
......@@ -1724,10 +1809,8 @@ int GetInitialsTimes(void)
}
if (ncorrections == 0) break;
}
if (ir == 100)
puts("i did 100 rounds, but we need more!");
free(r);
free(pptable); free(tmin); free(r);
return (0);
}
......@@ -3808,14 +3891,14 @@ int DescriptiveStatisticsSimpleMCMCTREE(FILE *fout, char infile[], int nbin)
}
}
printf("\n\nPosterior mean (95%% Equal-tail CI) (95%% HPD CI) HPD-CI-width\n\n");
printf("\n\nPosterior means (95%% Equal-tail CI) (95%% HPD CI) HPD-CI-width\n\n");
for (j = SkipC1; j < p; j++) {
printf("%-10s ", varstr[j]);
printf("%10.4f (%6.4f, %6.4f) (%6.4f, %6.4f) %6.4f", mean[j], x025[j], x975[j], xHPD025[j], xHPD975[j], xHPD975[j] - xHPD025[j]);
printf("%10.4f (%7.4f, %7.4f) (%7.4f, %7.4f) %7.4f", mean[j], x025[j], x975[j], xHPD025[j], xHPD975[j], xHPD975[j] - xHPD025[j]);
if (j < SkipC1 + stree.nspecies - 1) {
printf(" (Jnode %2d)", 2 * stree.nspecies - 1 - 1 - j + SkipC1);
if (com.TipDate)
printf(" time: %6.2f (%5.2f, %5.2f)", com.TipDate - mean[j] * com.TipDate_TimeUnit,
printf(" time: %7.3f (%6.3f, %6.3f)", com.TipDate - mean[j] * com.TipDate_TimeUnit,
com.TipDate - x975[j] * com.TipDate_TimeUnit, com.TipDate - x025[j] * com.TipDate_TimeUnit);
}
printf("\n");
......@@ -3827,11 +3910,11 @@ int DescriptiveStatisticsSimpleMCMCTREE(FILE *fout, char infile[], int nbin)
fprintf(fout, "\n\nPosterior mean (95%% Equal-tail CI) (95%% HPD CI) HPD-CI-width\n\n");
for (j = SkipC1; j < p; j++) {
fprintf(fout, "%-10s ", varstr[j]);
fprintf(fout, "%10.4f (%6.4f, %6.4f) (%6.4f, %6.4f) %6.4f", mean[j], x025[j], x975[j], xHPD025[j], xHPD975[j], xHPD975[j] - xHPD025[j]);
fprintf(fout, "%10.4f (%7.4f, %7.4f) (%7.4f, %7.4f) %7.4f", mean[j], x025[j], x975[j], xHPD025[j], xHPD975[j], xHPD975[j] - xHPD025[j]);
if (j < SkipC1 + stree.nspecies - 1) {
fprintf(fout, " (Jnode %2d)", 2 * stree.nspecies - 1 - 1 - j + SkipC1);
if (com.TipDate)
fprintf(fout, " time: %6.2f (%5.2f, %5.2f)", com.TipDate - mean[j] * com.TipDate_TimeUnit,
fprintf(fout, " time: %7.3f (%6.3f, %6.3f)", com.TipDate - mean[j] * com.TipDate_TimeUnit,
com.TipDate - x975[j] * com.TipDate_TimeUnit, com.TipDate - x025[j] * com.TipDate_TimeUnit);
}
fprintf(fout, "\n");
......@@ -3877,9 +3960,10 @@ int MCMC(FILE* fout)
for (j = 0; j < mcmc.nsteplength; j++)
mcmc.steplength[j] = 0.001 + 0.1*rndu();
/*
printf("\nsteplength values: ");
matout2(F0, mcmc.steplength, 1, mcmc.nsteplength, 9, 5);
*/
printStree();
x = (double*)malloc(com.np * 2 * sizeof(double));
......
......@@ -19,6 +19,14 @@
#define FOR(i,n) for(i=0; i<n; i++)
#define FPN(file) fputc('\n', file)
#define F0 stdout
#if !defined(MAX)
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#endif
#if !defined(MIN)
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
#endif
#define SQUARE(a) ((a)*(a))
#define min2(a,b) ((a)<(b)?(a):(b))
#define max2(a,b) ((a)>(b)?(a):(b))
#define swap2(a,b,y) { y=a; a=b; b=y; }
......@@ -386,6 +394,6 @@ enum {PrBranch=1, PrNodeNum=2, PrLabel=4, PrNodeStr=8, PrAge=16, PrOmega=32} Out
#define PAML_RELEASE 0
#define pamlVerStr "paml version 4.9i, September 2018"
#define pamlVerStr "paml version 4.9j, October 2019"
#endif
/*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Contact: Tomas Flouri <Tomas.Flouri@h-its.org>,
Exelixis Lab, Heidelberg Instutute for Theoretical Studies
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#if defined(_MSC_VER)
static __declspec(align(32)) double lmatrix[4*4];
static __declspec(align(32)) double rmatrix[4*4];
#else
static double lmatrix[4*4] __attribute__ ((aligned (32)));
static double rmatrix[4*4] __attribute__ ((aligned (32)));
#endif
void pmat_jc69(double * pmatrix, double t)
{
if (t < -0.0001)
printf ("\nt = %.5f in pijJC69", t);
if (t < 1e-100)
{
pmatrix[0] = 1;
pmatrix[1] = 0;
pmatrix[2] = 0;
pmatrix[3] = 0;
pmatrix[4] = 0;
pmatrix[5] = 1;
pmatrix[6] = 0;
pmatrix[7] = 0;
pmatrix[8] = 0;
pmatrix[9] = 0;
pmatrix[10] = 1;
pmatrix[11] = 0;
pmatrix[12] = 0;
pmatrix[13] = 0;
pmatrix[14] = 0;
pmatrix[15] = 1;
}
else
{
double a = (1 + 3*exp(-4*t/3) ) / 4;
double b = (1 - a) / 3;
pmatrix[0] = a;
pmatrix[1] = b;
pmatrix[2] = b;
pmatrix[3] = b;
pmatrix[4] = b;
pmatrix[5] = a;
pmatrix[6] = b;
pmatrix[7] = b;
pmatrix[8] = b;
pmatrix[9] = b;
pmatrix[10] = a;
pmatrix[11] = b;
pmatrix[12] = b;
pmatrix[13] = b;
pmatrix[14] = b;
pmatrix[15] = a;
}
}
static void tip_tip(int lchild, int rchild, double * clv)
{
int sites = com.npatt;
int states = 4;
int n,i;
__m256d ymm0,ymm1,ymm2;
pmat_jc69(lmatrix, nodes[lchild].branch);
pmat_jc69(rmatrix, nodes[rchild].branch);
for (n = 0; n < sites; ++n)
{
double * lmat = lmatrix + com.z[lchild][n] * states;
double * rmat = rmatrix + com.z[rchild][n] * states;
// for (i = 0; i < states; ++i)
// clv[i] = lmat[i] * rmat[i];
ymm0 = _mm256_load_pd(lmat);
ymm1 = _mm256_load_pd(rmat);
ymm2 = _mm256_mul_pd(ymm0,ymm1);
_mm256_store_pd(clv,ymm2);
clv += states;
}
}
static void tip_tip_amb(int lchild, int rchild, double * clv)
{
int sites = com.npatt;
int states = 4;
int n,k;
double lvec[4];
double rvec[4];
pmat_jc69(lmatrix, nodes[lchild].branch);
pmat_jc69(rmatrix, nodes[rchild].branch);
__m256d xmm0,xmm1,ymm0,ymm1;
for (n = 0; n < sites; ++n)
{
double * lmat = lmatrix + (CharaMap[com.z[lchild][n]][0] << 2); // *states;
double * rmat = rmatrix + (CharaMap[com.z[rchild][n]][0] << 2); // *states;
xmm0 = _mm256_load_pd(lmat);
for (k = 1; k < nChara[com.z[lchild][n]]; ++k)
{
lmat = lmatrix + (CharaMap[com.z[lchild][n]][k] << 2); // *states;
xmm1 = _mm256_load_pd(lmat);
xmm0 = _mm256_add_pd(xmm0,xmm1);
}
ymm0 = _mm256_load_pd(rmat);
for (k = 1; k < nChara[com.z[rchild][n]]; ++k)
{
rmat = rmatrix + (CharaMap[com.z[rchild][n]][k] << 2); // *states;
ymm1 = _mm256_load_pd(rmat);
ymm0 = _mm256_add_pd(ymm0,ymm1);
}
xmm1 = _mm256_mul_pd(xmm0,ymm0);
_mm256_store_pd(clv,xmm1);
clv += states;
}
}
static void tip_inner(int lchild, int rchild, double * clv)
{
int sites = com.npatt;
int states = 4;
int h;
pmat_jc69(lmatrix, nodes[lchild].branch);
pmat_jc69(rmatrix, nodes[rchild].branch);
double * rclv = nodes[rchild].conP;
__m256d ymm0,ymm1,ymm2,ymm3,ymm4,ymm5,ymm6,ymm7;
__m256d xmm0,xmm4;
for (h = 0; h < sites; ++h)
{
double * lmat = lmatrix + com.z[lchild][h] * states;
double * rmat = rmatrix;
xmm4 = _mm256_load_pd(lmat);
ymm4 = _mm256_load_pd(rmat);
ymm5 = _mm256_load_pd(rclv);
ymm0 = _mm256_mul_pd(ymm4,ymm5);
rmat += states;
ymm4 = _mm256_load_pd(rmat);
ymm1 = _mm256_mul_pd(ymm4,ymm5);
rmat += states;
ymm4 = _mm256_load_pd(rmat);
ymm2 = _mm256_mul_pd(ymm4,ymm5);
rmat += states;
ymm4 = _mm256_load_pd(rmat);
ymm3 = _mm256_mul_pd(ymm4,ymm5);
/* compute y */
ymm4 = _mm256_unpackhi_pd(ymm0,ymm1);
ymm5 = _mm256_unpacklo_pd(ymm0,ymm1);
ymm6 = _mm256_unpackhi_pd(ymm2,ymm3);
ymm7 = _mm256_unpacklo_pd(ymm2,ymm3);
ymm0 = _mm256_add_pd(ymm4,ymm5);
ymm1 = _mm256_add_pd(ymm6,ymm7);
ymm2 = _mm256_permute2f128_pd(ymm0,ymm1, _MM_SHUFFLE(0,2,0,1));
ymm3 = _mm256_blend_pd(ymm0,ymm1,12);
ymm4 = _mm256_add_pd(ymm2,ymm3);
/* compute x*y */
xmm0 = _mm256_mul_pd(xmm4,ymm4);
_mm256_store_pd(clv, xmm0);
clv += states;
rclv += states;
}
}
static void tip_inner_amb(int lchild, int rchild, double * clv)
{
int sites = com.npatt;
int states = 4;
int h,j,k;
double y;
double x;
pmat_jc69(lmatrix, nodes[lchild].branch);
pmat_jc69(rmatrix, nodes[rchild].branch);
__m256d ymm0,ymm1,ymm2,ymm3,ymm4,ymm5,ymm6,ymm7;
__m256d xmm0,xmm1;
double * rclv = nodes[rchild].conP;
for (h = 0; h < sites; ++h)
{
double * lmat = lmatrix + (CharaMap[com.z[lchild][h]][0] << 2); // *states;
double * rmat = rmatrix;
/* compute term for left child (tip) */
xmm0 = _mm256_load_pd(lmat);
for (k = 1; k < nChara[com.z[lchild][h]]; ++k)
{
lmat = lmatrix + (CharaMap[com.z[lchild][h]][k] << 2); // *states;
xmm1 = _mm256_load_pd(lmat);
xmm0 = _mm256_add_pd(xmm0,xmm1);
}
/* compute term for right child (inner) */
ymm4 = _mm256_load_pd(rmat);
ymm5 = _mm256_load_pd(rclv);
ymm0 = _mm256_mul_pd(ymm4,ymm5);
rmat += states;
ymm4 = _mm256_load_pd(rmat);
ymm1 = _mm256_mul_pd(ymm4,ymm5);
rmat += states;
ymm4 = _mm256_load_pd(rmat);
ymm2 = _mm256_mul_pd(ymm4,ymm5);
rmat += states;
ymm4 = _mm256_load_pd(rmat);
ymm3 = _mm256_mul_pd(ymm4,ymm5);
ymm4 = _mm256_unpackhi_pd(ymm0,ymm1);
ymm5 = _mm256_unpacklo_pd(ymm0,ymm1);
ymm6 = _mm256_unpackhi_pd(ymm2,ymm3);
ymm7 = _mm256_unpacklo_pd(ymm2,ymm3);
ymm0 = _mm256_add_pd(ymm4,ymm5);
ymm1 = _mm256_add_pd(ymm6,ymm7);
ymm2 = _mm256_permute2f128_pd(ymm0,ymm1, _MM_SHUFFLE(0,2,0,1));
ymm3 = _mm256_blend_pd(ymm0,ymm1,12);
ymm4 = _mm256_add_pd(ymm2,ymm3);
ymm0 = _mm256_mul_pd(xmm0,ymm4);
_mm256_store_pd(clv,ymm0);
clv += states;
rclv += states;
}
}
static void inner_inner(int lchild, int rchild, double * clv)
{
int sites = com.npatt;
int states = 4;
int h;
__m256d ymm0,ymm1,ymm2,ymm3,ymm4,ymm5,ymm6,ymm7;
__m256d xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7;
pmat_jc69(lmatrix, nodes[lchild].branch);
pmat_jc69(rmatrix, nodes[rchild].branch);
double * lclv = nodes[lchild].conP;
double * rclv = nodes[rchild].conP;
for (h = 0; h < sites; ++h)
{
double * lmat = lmatrix;
double * rmat = rmatrix;
/* compute vector of x */
xmm4 = _mm256_load_pd(lmat);
xmm5 = _mm256_load_pd(lclv);
xmm0 = _mm256_mul_pd(xmm4,xmm5);
ymm4 = _mm256_load_pd(rmat);
ymm5 = _mm256_load_pd(rclv);
ymm0 = _mm256_mul_pd(ymm4,ymm5);
lmat += states;
rmat += states;
xmm4 = _mm256_load_pd(lmat);
xmm1 = _mm256_mul_pd(xmm4,xmm5);
ymm4 = _mm256_load_pd(rmat);
ymm1 = _mm256_mul_pd(ymm4,ymm5);
lmat += states;
rmat += states;
xmm4 = _mm256_load_pd(lmat);
xmm2 = _mm256_mul_pd(xmm4,xmm5);
ymm4 = _mm256_load_pd(rmat);
ymm2 = _mm256_mul_pd(ymm4,ymm5);
lmat += states;
rmat += states;
xmm4 = _mm256_load_pd(lmat);
xmm3 = _mm256_mul_pd(xmm4,xmm5);
ymm4 = _mm256_load_pd(rmat);
ymm3 = _mm256_mul_pd(ymm4,ymm5);
/* compute x */
xmm4 = _mm256_unpackhi_pd(xmm0,xmm1);
xmm5 = _mm256_unpacklo_pd(xmm0,xmm1);
xmm6 = _mm256_unpackhi_pd(xmm2,xmm3);
xmm7 = _mm256_unpacklo_pd(xmm2,xmm3);
xmm0 = _mm256_add_pd(xmm4,xmm5);
xmm1 = _mm256_add_pd(xmm6,xmm7);
xmm2 = _mm256_permute2f128_pd(xmm0,xmm1, _MM_SHUFFLE(0,2,0,1));
xmm3 = _mm256_blend_pd(xmm0,xmm1,12);
xmm4 = _mm256_add_pd(xmm2,xmm3);
/* compute y */
ymm4 = _mm256_unpackhi_pd(ymm0,ymm1);
ymm5 = _mm256_unpacklo_pd(ymm0,ymm1);
ymm6 = _mm256_unpackhi_pd(ymm2,ymm3);
ymm7 = _mm256_unpacklo_pd(ymm2,ymm3);
ymm0 = _mm256_add_pd(ymm4,ymm5);
ymm1 = _mm256_add_pd(ymm6,ymm7);
ymm2 = _mm256_permute2f128_pd(ymm0,ymm1, _MM_SHUFFLE(0,2,0,1));
ymm3 = _mm256_blend_pd(ymm0,ymm1,12);
ymm4 = _mm256_add_pd(ymm2,ymm3);
/* compute x*y */
xmm0 = _mm256_mul_pd(xmm4,ymm4);
_mm256_store_pd(clv, xmm0);
clv += states;
lclv += states;
rclv += states;
}
}
int ConditonalPNode (int inode)
{
int states = 4;
int sites = com.npatt;
int i;
int child, lchild, rchild;
/* recursive call the ConditionalPNode on all children of the current node */
for (i = 0; i < nodes[inode].nson; ++i)
{
/* get id of i-th child of current node */
child = nodes[inode].sons[i];
/* if child is an inner node then recursively compute its conditional
probabilities vector */
if (nodes[child].nson > 0 && (!mcmc.saveconP || !com.oldconP[child]))
ConditonalPNode(nodes[inode].sons[i]);
}
/* initialize CLV entries of current node to 1 */
int n = sites * states;
for (i = 0; i < n; ++i)
nodes[inode].conP[i] = 1;
if (nodes[inode].nson == 0) return (0);
lchild = nodes[inode].sons[0];
rchild = nodes[inode].sons[1];
int ltip = (nodes[lchild].nson == 0);
int rtip = (nodes[rchild].nson == 0);
if (ltip && rtip)
{
if (com.cleandata)
tip_tip(lchild,rchild,nodes[inode].conP);
else
tip_tip_amb(lchild,rchild,nodes[inode].conP);
}
else if (ltip)
{
if (com.cleandata)
tip_inner(lchild,rchild,nodes[inode].conP);
else
tip_inner_amb(lchild,rchild,nodes[inode].conP);
}
else if (rtip)
{
if (com.cleandata)
tip_inner(rchild,lchild,nodes[inode].conP);
else
tip_inner_amb(rchild,lchild,nodes[inode].conP);
}
else
inner_inner(lchild,rchild,nodes[inode].conP);
return (0);
}
/*
Copyright (C) 2015 Tomas Flouri
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Contact: Tomas Flouri <Tomas.Flouri@h-its.org>,
Exelixis Lab, Heidelberg Instutute for Theoretical Studies
Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
*/
#if defined(_MSC_VER)
static __declspec(align(16)) double lmatrix[4*4];
static __declspec(align(16)) double rmatrix[4*4];
#else
static double lmatrix[4*4] __attribute__ ((aligned (16)));
static double rmatrix[4*4] __attribute__ ((aligned (16)));
#endif
void pmat_jc69(double * pmatrix, double t)
{
if (t < -0.0001)
printf ("\nt = %.5f in pijJC69", t);
if (t < 1e-100)
{
pmatrix[0] = 1;
pmatrix[1] = 0;
pmatrix[2] = 0;
pmatrix[3] = 0;
pmatrix[4] = 0;
pmatrix[5] = 1;
pmatrix[6] = 0;
pmatrix[7] = 0;
pmatrix[8] = 0;
pmatrix[9] = 0;
pmatrix[10] = 1;
pmatrix[11] = 0;
pmatrix[12] = 0;
pmatrix[13] = 0;
pmatrix[14] = 0;
pmatrix[15] = 1;
}
else
{
double a = (1 + 3*exp(-4*t/3) ) / 4;
double b = (1 - a) / 3;
pmatrix[0] = a;
pmatrix[1] = b;
pmatrix[2] = b;
pmatrix[3] = b;
pmatrix[4] = b;
pmatrix[5] = a;
pmatrix[6] = b;
pmatrix[7] = b;
pmatrix[8] = b;
pmatrix[9] = b;
pmatrix[10] = a;
pmatrix[11] = b;
pmatrix[12] = b;
pmatrix[13] = b;
pmatrix[14] = b;
pmatrix[15] = a;
}
}
static void tip_tip(int lchild, int rchild, double * clv)
{
int sites = com.npatt;
int states = 4;
int n,i;
pmat_jc69(lmatrix, nodes[lchild].branch);
pmat_jc69(rmatrix, nodes[rchild].branch);
__m128d xmm0,xmm1,xmm2;
for (n = 0; n < sites; ++n)
{
double * lmat = lmatrix + (com.z[lchild][n] << 2); // *states;
double * rmat = rmatrix + (com.z[rchild][n] << 2); // *states;
/*
Vectorization of the following code snippet
(only applicable to JC69)
for (i = 0; i < states; ++i)
clv[i] = lmat[i] * rmat[i];
*/
xmm0 = _mm_load_pd(lmat);
xmm1 = _mm_load_pd(rmat);
xmm2 = _mm_mul_pd(xmm0,xmm1);
_mm_store_pd(clv,xmm2);
xmm0 = _mm_load_pd(lmat+2);
xmm1 = _mm_load_pd(rmat+2);
xmm2 = _mm_mul_pd(xmm0,xmm1);
_mm_store_pd(clv+2,xmm2);
clv += states;
}
}
static void tip_tip_amb(int lchild, int rchild, double * clv)
{
int sites = com.npatt;
int states = 4;
int n,k;
double lvec[4];
double rvec[4];
pmat_jc69(lmatrix, nodes[lchild].branch);
pmat_jc69(rmatrix, nodes[rchild].branch);
__m128d xmm0,xmm1,xmm2,xmm3;
__m128d ymm0,ymm1,ymm2,ymm3;
for (n = 0; n < sites; ++n)
{
double * lmat = lmatrix + (CharaMap[com.z[lchild][n]][0] << 2); // *states;
double * rmat = rmatrix + (CharaMap[com.z[rchild][n]][0] << 2); // *states;
xmm0 = _mm_load_pd(lmat);
xmm1 = _mm_load_pd(lmat+2);
for (k = 1; k < nChara[com.z[lchild][n]]; ++k)
{
lmat = lmatrix + (CharaMap[com.z[lchild][n]][k] << 2); // *states;
xmm2 = _mm_load_pd(lmat);
xmm3 = _mm_load_pd(lmat+2);
xmm0 = _mm_add_pd(xmm0,xmm2);
xmm1 = _mm_add_pd(xmm1,xmm3);
}
ymm0 = _mm_load_pd(rmat);
ymm1 = _mm_load_pd(rmat+2);
for (k = 1; k < nChara[com.z[rchild][n]]; ++k)
{
rmat = rmatrix + (CharaMap[com.z[rchild][n]][k] << 2); // *states;
ymm2 = _mm_load_pd(rmat);
ymm3 = _mm_load_pd(rmat+2);
ymm0 = _mm_add_pd(ymm0,ymm2);
ymm1 = _mm_add_pd(ymm1,ymm3);
}
xmm2 = _mm_mul_pd(xmm0,ymm0);
xmm3 = _mm_mul_pd(xmm1,ymm1);
_mm_store_pd(clv,xmm2);
_mm_store_pd(clv+2,xmm3);
clv += states;
}
}
static void tip_inner(int lchild, int rchild, double * clv)
{
int sites = com.npatt;
int states = 4;
int h,j,k;
double y;
__m128d xmm0,xmm1,xmm2,xmm3,xmm4;
__m128d xmm5,xmm6,xmm7,xmm8,xmm9;
pmat_jc69(lmatrix, nodes[lchild].branch);
pmat_jc69(rmatrix, nodes[rchild].branch);
double * rclv = nodes[rchild].conP;
for (h = 0; h < sites; ++h)
{
double * lmat = lmatrix + (com.z[lchild][h] << 2); // *states;
double * rmat = rmatrix;
/*
Vectorization of the following code snippet
for (j = 0; j < states; ++j)
{
y = 0;
for (k = 0; k < states; ++k)
y += rmat[k] * rclv[k];
rmat += states;
clv[j] = y*lmat[j];
}
*/
xmm0 = _mm_load_pd(rclv); /* needed */
xmm2 = _mm_load_pd(rmat);
xmm3 = _mm_mul_pd(xmm0,xmm2);
xmm1 = _mm_load_pd(rclv+2); /* needed */
xmm2 = _mm_load_pd(rmat+2);
xmm4 = _mm_mul_pd(xmm1,xmm2);
/* calculate (b1*d1 + b2*d2 | b3*d3 + b4*d4) */
xmm2 = _mm_hadd_pd(xmm3,xmm4); /* needed (1) */
rmat += states;
xmm3 = _mm_load_pd(rmat);
xmm4 = _mm_mul_pd(xmm0,xmm3);
xmm5 = _mm_load_pd(rmat+2);
xmm6 = _mm_mul_pd(xmm1,xmm5);
/* calculate (b5*d1 + b6*d2 | b7*d3 + b8*d4) */
xmm3 = _mm_hadd_pd(xmm4,xmm6); /* needed (2) */
rmat += states;
xmm4 = _mm_load_pd(rmat);
xmm5 = _mm_mul_pd(xmm0,xmm4);
xmm6 = _mm_load_pd(rmat+2);
xmm7 = _mm_mul_pd(xmm1,xmm6);
/* calculate (b9*d1 + b10*d2 | b11*d3 + b12*d4) */
xmm4 = _mm_hadd_pd(xmm5,xmm7); /* needed (3) */
rmat += states;
xmm5 = _mm_load_pd(rmat);
xmm6 = _mm_mul_pd(xmm0,xmm5);
xmm7 = _mm_load_pd(rmat+2);
xmm8 = _mm_mul_pd(xmm1,xmm7);
/* calculate (b13*d1 + b14*d2 | b15*d3 + b16*d4) */
xmm5 = _mm_hadd_pd(xmm6,xmm8); /* needed (4) */
/* calculate (b1*d1 + b2*d2 + b3*d3 + b4*d3 |
b5*d1 + b6*d2 + b7*d3 + b8*d4 ) */
xmm0 = _mm_hadd_pd(xmm2,xmm3);
/* calculate (b9*d1 + b10*d2 + b11*d3 + b12*d3 |
b13*d1 + b14*d2 + b15*d3 + b16*d4 ) */
xmm1 = _mm_hadd_pd(xmm4,xmm5);
xmm2 = _mm_load_pd(lmat);
xmm3 = _mm_load_pd(lmat+2);
xmm4 = _mm_mul_pd(xmm0,xmm2);
xmm5 = _mm_mul_pd(xmm1,xmm3);
_mm_store_pd(clv,xmm4);
_mm_store_pd(clv+2,xmm5);
clv += states;
rclv += states;
}
}
static void tip_inner_amb(int lchild, int rchild, double * clv)
{
int sites = com.npatt;
int states = 4;
int h,k;
pmat_jc69(lmatrix, nodes[lchild].branch);
pmat_jc69(rmatrix, nodes[rchild].branch);
__m128d xmm0,xmm1,xmm2,xmm3;
__m128d ymm2,ymm3,ymm4,ymm5,ymm6,ymm7,ymm8,ymm9;
double * rclv = nodes[rchild].conP;
for (h = 0; h < sites; ++h)
{
double * lmat = lmatrix + (CharaMap[com.z[lchild][h]][0] << 2); // *states;
double * rmat = rmatrix;
/* compute term for left child (tip) */
xmm0 = _mm_load_pd(lmat);
xmm1 = _mm_load_pd(lmat+2);
for (k = 1; k < nChara[com.z[lchild][h]]; ++k)
{
lmat = lmatrix + (CharaMap[com.z[lchild][h]][k] << 2); // *states;
xmm2 = _mm_load_pd(lmat);
xmm3 = _mm_load_pd(lmat+2);
xmm0 = _mm_add_pd(xmm0,xmm2);
xmm1 = _mm_add_pd(xmm1,xmm3);
}
/* compute term for right child (inner) */
ymm3 = _mm_load_pd(rclv); /* needed */
ymm4 = _mm_load_pd(rmat);
ymm6 = _mm_mul_pd(ymm3,ymm4);
ymm4 = _mm_load_pd(rclv+2); /* needed */
ymm7 = _mm_load_pd(rmat+2);
ymm8 = _mm_mul_pd(ymm4,ymm7);
/* calculate (b1*d1 + b2*d2 | b3*d3 + b4*d4) */
ymm5 = _mm_hadd_pd(ymm6,ymm8); /* needed (2) */
rmat += states;
ymm7 = _mm_load_pd(rmat);
ymm8 = _mm_mul_pd(ymm3,ymm7);
ymm7 = _mm_load_pd(rmat+2);
ymm9 = _mm_mul_pd(ymm4,ymm7);
/* calculate (b5*d1 + b6*d2 | b7*d3 + b8*d4) */
ymm7 = _mm_hadd_pd(ymm8,ymm9); /* needed (4) */
ymm2 = _mm_hadd_pd(ymm5,ymm7);
ymm9 = _mm_mul_pd(xmm0,ymm2);
_mm_store_pd(clv, ymm9);
rmat += states;
ymm6 = _mm_load_pd(rmat);
ymm7 = _mm_mul_pd(ymm3,ymm6);
ymm6 = _mm_load_pd(rmat+2);
ymm8 = _mm_mul_pd(ymm4,ymm6);
/* calculate (b9*d1 + b10*d2 | b11*d3 + b12*d4) */
ymm5 = _mm_hadd_pd(ymm7,ymm8); /* needed (2) */
rmat += states;
ymm7 = _mm_load_pd(rmat);
ymm8 = _mm_mul_pd(ymm3,ymm7);
ymm7 = _mm_load_pd(rmat+2);
ymm9 = _mm_mul_pd(ymm4,ymm7);
/* calculate (b13*d1 + b14*d2 | b15*d3 + b16*d4) */
ymm7 = _mm_hadd_pd(ymm8,ymm9); /* needed (4) */
ymm2 = _mm_hadd_pd(ymm5,ymm7);
ymm9 = _mm_mul_pd(xmm1,ymm2);
_mm_store_pd(clv+2, ymm9);
clv += states;
rclv += states;
}
}
static void inner_inner(int lchild, int rchild, double * clv)
{
int sites = com.npatt;
int states = 4;
int h,j,k;
double x,y;
pmat_jc69(lmatrix, nodes[lchild].branch);
pmat_jc69(rmatrix, nodes[rchild].branch);
double * lclv = nodes[lchild].conP;
double * rclv = nodes[rchild].conP;
__m128d xmm0,xmm1,xmm2,xmm3,xmm4;
__m128d xmm5,xmm6,xmm7,xmm8,xmm9;
/*
+-----+-----+-----+-----+ +-----+-----+-----+-----+
| a1 | a2 | a3 | a4 | | b1 | b2 | b3 | b4 |
+-----+-----+-----+-----+ +-----+-----+-----+-----+
| a5 | a6 | a7 | a8 | | b5 | b6 | b7 | b8 |
+-----+-----+-----+-----+ +-----+-----+-----+-----+
| a9 | a10 | a11 | a12 | | b9 | b10 | b11 | b12 |
+-----+-----+-----+-----+ +-----+-----+-----+-----+
| a13 | a14 | a15 | a16 | | b13 | b14 | b15 | b16 |
+-----+-----+-----+-----+ +-----+-----+-----+-----+
x x
+----+----+----+----+ +----+----+----+----+
| c1 | c2 | c3 | c4 | | d1 | d2 | d3 | d4 |
+----+----+----+----+ +----+----+----+----+
*/
for (h = 0; h < sites; ++h)
{
double * lmat = lmatrix;
double * rmat = rmatrix;
/*
Vectorization of the following code snippet
for (j = 0; j < states; ++j)
{
x = 0;
y = 0;
for (k = 0; k < states; ++k)
{
x += lmat[k] * lclv[k];
y += rmat[k] * rclv[k];
}
lmat += states;
rmat += states;
clv[j] = x*y;
}
*/
/* compute left */
xmm0 = _mm_load_pd(lclv); /* needed */
xmm2 = _mm_load_pd(lmat);
xmm3 = _mm_mul_pd(xmm0,xmm2);
xmm1 = _mm_load_pd(lclv+2); /* needed */
xmm2 = _mm_load_pd(lmat+2);
xmm4 = _mm_mul_pd(xmm1,xmm2);
/* calculate (a1*c1 + a2*c2 | a3*c3 + a4*c4) */
xmm2 = _mm_hadd_pd(xmm3,xmm4); /* needed (1) */
/* compute right */
xmm3 = _mm_load_pd(rclv); /* needed */
xmm4 = _mm_load_pd(rmat);
xmm6 = _mm_mul_pd(xmm3,xmm4);
xmm4 = _mm_load_pd(rclv+2); /* needed */
xmm7 = _mm_load_pd(rmat+2);
xmm8 = _mm_mul_pd(xmm4,xmm7);
/* calculate (b1*d1 + b2*d2 | b3*d3 + b4*d4) */
xmm5 = _mm_hadd_pd(xmm6,xmm8); /* needed (2) */
rmat += states;
lmat += states;
/* compute left */
xmm6 = _mm_load_pd(lmat);
xmm7 = _mm_mul_pd(xmm0,xmm6);
xmm6 = _mm_load_pd(lmat+2);
xmm8 = _mm_mul_pd(xmm1,xmm6);
/* calculate (a5*c1 + a6*c2 | a7*c3 + a8*c4) */
xmm6 = _mm_hadd_pd(xmm7,xmm8); /* needed (3) */
/* compute right */
xmm7 = _mm_load_pd(rmat);
xmm8 = _mm_mul_pd(xmm3,xmm7);
xmm7 = _mm_load_pd(rmat+2);
xmm9 = _mm_mul_pd(xmm4,xmm7);
/* calculate (b5*d1 + b6*d2 | b7*d3 + b8*d4) */
xmm7 = _mm_hadd_pd(xmm8,xmm9); /* needed (4) */
xmm8 = _mm_hadd_pd(xmm2,xmm6);
xmm2 = _mm_hadd_pd(xmm5,xmm7);
xmm9 = _mm_mul_pd(xmm8,xmm2);
_mm_store_pd(clv, xmm9);
rmat += states;
lmat += states;
/* compute left */
xmm5 = _mm_load_pd(lmat);
xmm6 = _mm_mul_pd(xmm0,xmm5);
xmm5 = _mm_load_pd(lmat+2);
xmm7 = _mm_mul_pd(xmm1,xmm5);
/* calculate (a9*c1 + a10*c2 | a11*c3 + a12*c4) */
xmm2 = _mm_hadd_pd(xmm6,xmm7); /* needed (1) */
/* compute right */
xmm6 = _mm_load_pd(rmat);
xmm7 = _mm_mul_pd(xmm3,xmm6);
xmm6 = _mm_load_pd(rmat+2);
xmm8 = _mm_mul_pd(xmm4,xmm6);
/* calculate (b9*d1 + b10*d2 | b11*d3 + b12*d4) */
xmm5 = _mm_hadd_pd(xmm7,xmm8); /* needed (2) */
rmat += states;
lmat += states;
/* compute left */
xmm6 = _mm_load_pd(lmat);
xmm7 = _mm_mul_pd(xmm0,xmm6);
xmm6 = _mm_load_pd(lmat+2);
xmm8 = _mm_mul_pd(xmm1,xmm6);
/* calculate (a13*c1 + a14*c2 | a15*c3 + a16*c4) */
xmm6 = _mm_hadd_pd(xmm7,xmm8); /* needed (3) */
/* compute right */
xmm7 = _mm_load_pd(rmat);
xmm8 = _mm_mul_pd(xmm3,xmm7);
xmm7 = _mm_load_pd(rmat+2);
xmm9 = _mm_mul_pd(xmm4,xmm7);
/* calculate (b13*d1 + b14*d2 | b15*d3 + b16*d4) */
xmm7 = _mm_hadd_pd(xmm8,xmm9); /* needed (4) */
xmm8 = _mm_hadd_pd(xmm2,xmm6);
xmm2 = _mm_hadd_pd(xmm5,xmm7);
xmm9 = _mm_mul_pd(xmm8,xmm2);
_mm_store_pd(clv+2, xmm9);
clv += states;
lclv += states;
rclv += states;
}
}
int ConditonalPNode (int inode)
{
int states = 4;
int sites = com.npatt;
int i;
int child, lchild, rchild;
/* recursive call the ConditionalPNode on all children of the current node */
for (i = 0; i < nodes[inode].nson; ++i)
{
/* get id of i-th child of current node */
child = nodes[inode].sons[i];
/* if child is an inner node then recursively compute its conditional
probabilities vector */
if (nodes[child].nson > 0 && (!mcmc.saveconP || !com.oldconP[child]))
ConditonalPNode(nodes[inode].sons[i]);
}
/* initialize CLV entries of current node to 1 */
int n = sites * states;
for (i = 0; i < n; ++i)
nodes[inode].conP[i] = 1;
if (nodes[inode].nson == 0) return (0);
lchild = nodes[inode].sons[0];
rchild = nodes[inode].sons[1];
int ltip = (nodes[lchild].nson == 0);
int rtip = (nodes[rchild].nson == 0);
if (ltip && rtip)
{
if (com.cleandata)
tip_tip(lchild,rchild,nodes[inode].conP);
else
{
//assert(0);
tip_tip_amb(lchild,rchild,nodes[inode].conP);
}
}
else if (ltip)
{
if (com.cleandata)
tip_inner(lchild,rchild,nodes[inode].conP);
else
{
tip_inner_amb(lchild,rchild,nodes[inode].conP);
//assert(0);
}
}
else if (rtip)
{
if (com.cleandata)
tip_inner(rchild,lchild,nodes[inode].conP);
else
{
//assert(0);
tip_inner_amb(rchild,lchild,nodes[inode].conP);
}
}
else
inner_inner(lchild,rchild,nodes[inode].conP);
return (0);
}
......@@ -1756,7 +1756,8 @@ double rndLaplace(void) {
double rndt2(void)
{
/* Standard Student's t_2 variate, with d.f. = 2. t2 has mean 0 and variance infinity. */
/* Standard Student's t_2 variate, with d.f. = 2. t2 has mean 0 and variance infinity.
*/
double u, t2;
u = 2 * rndu() - 1;
......@@ -1772,7 +1773,7 @@ double rndt4(void)
This has variance 1, and is the standard t4 variate divided by sqrt(2).
The standard t4 variate has variance 2.
*/
double u, v, w, c2, r2, t4, sqrt2 = 0.707106781;
double u, v, w, c2, r2, t4, sqrt2 = 0.7071067811865475244;
for ( ; ; ) {
u = 2 * rndu() - 1;
......@@ -4525,7 +4526,7 @@ double Binomial(double n, int k, double *scale)
/* calculates (n choose k), where n is any real number, and k is integer.
If(*scale!=0) the result should be c+exp(*scale).
*/
double c = 1, i, large = 1e99;
double c = 1, i, large = 1e200;
*scale = 0;
if ((int)k != k)
......@@ -5676,12 +5677,12 @@ int DescriptiveStatistics(FILE *fout, char infile[], int nbin, int propternary,
for (j = 0; j < p; j++)
fscanf(fin, "%lf", &data[j*n + i]);
fclose(fin);
/*
if(p>1) {
printf("\nGreat offer! I can smooth a few 2-D densities for free. How many do you want? ");
scanf("%d", &nf2d);
}
*/
if (nf2d > MAXNF2D) error2("I don't want to do that many!");
for (i = 0; i < nf2d; i++) {
printf("pair #%d (e.g., type 1 3 to use variables #1 and #3)? ", i + 1);
......@@ -5735,6 +5736,8 @@ int DescriptiveStatistics(FILE *fout, char infile[], int nbin, int propternary,
fprintf(fout, "\n\n");
fflush(fout);
return(0);
fprintf(fout, "\nCorrelation matrix");
for (j = SkipColumns; j < p; j++) {
fprintf(fout, "\n%-8s ", varstr[j]);
......
......@@ -936,7 +936,7 @@ int ReadSeq(FILE *fout, FILE *fseq, int cleandata, int locus)
/***** GIBBON, 2017.10.8 **/
//return(0);
/* return(0); */
if (!com.readpattern) {
PatternWeight();
......@@ -3140,8 +3140,10 @@ int ReadTreeN(FILE *ftree, int *haslength, int copyname, int popline)
if (nodes[inode].annotation == NULL) error2("oom annotation");
strcpy(nodes[inode].annotation, line);
/* This line is used in MCcoal. ProcessNodeAnnotation is used to process node annotation elsewhere. */
if (line[0] == '#')
if (line[0] == '#') /* branch labels used in baseml and codeml */
sscanf(line + 1, "%lf", &nodes[inode].label);
else if (line[0] == '$') /* clade labels used to label branches in baseml and codeml */
sscanf(line + 1, "%lf", &nodes[inode].label2);
}
} /* for( ; ; ) */
......@@ -3195,9 +3197,15 @@ int OutSubTreeN(FILE *fout, int inode, int spnames, int printopt)
}
if ((printopt & PrNodeNum) && nodes[inode].nson)
fprintf(fout, " %d ", inode + 1);
#if (defined BPP)
if ((printopt & PrNodeStr) && inode < com.ns && nodes[inode].label > 0)
fprintf(fout, " [&theta=%.6f]", nodes[inode].label);
#else
if ((printopt & PrLabel) && nodes[inode].label > 0)
fprintf(fout, " #%.6f", nodes[inode].label);
/* fprintf(fout, " [&label=%.6f]", nodes[inode].label); */
#endif
if ((printopt & PrAge) && nodes[inode].age)
fprintf(fout, " @%.6f", nodes[inode].age);
......@@ -8544,10 +8552,9 @@ int ProcessNodeAnnotation(int *haslabel)
stree.nodes[i].fossil = j;
pch2 = strchr(pch, braces[0]);
if (pch2 == NULL) pch2 = strchr(pch, braces[1]);
if (pch2)
pch = pch2 + 1;
else
if (pch2 == NULL)
error2("did not find left brace");
pch = pch2 + 1;
switch (j) {
case (LOWER_F):
......@@ -8693,7 +8700,8 @@ int ProcessNodeAnnotation(int *haslabel)
for (i = 0, com.nbtype = 0; i < tree.nnode; i++) {
if (i == tree.root) continue;
j = (int)nodes[i].label;
if (j + 1 > com.nbtype) com.nbtype = j + 1;
if (j + 1 > com.nbtype)
com.nbtype = j + 1;
if (j<0 || j>tree.nbranch - 1)
error2("branch label in the tree (note labels start from 0 and are consecutive)");
}
......@@ -8953,13 +8961,17 @@ void copySptree(void)
void printStree(void)
{
int i, j, k;
int i, j, k, maxlname=10;
for (i = 0; i < stree.nspecies; i++)
if ((k = (int)strlen(stree.nodes[i].name)) > maxlname)
maxlname = k;
printf("\n************\nSpecies tree\nns = %d nnode = %d", stree.nspecies, stree.nnode);
printf("\n%7s%7s %-16s %8s %8s%16s\n", "father", "node", "name", "time", "sons", "fossil");
printf("\n%7s%7s %-*s %9s %8s%16s\n", "father", "node", maxlname+2, "name", "time", "sons", "fossil");
for (i = 0; i < stree.nnode; i++) {
printf("%7d%7d %-20s %7.5f",
stree.nodes[i].father + 1, i + 1, stree.nodes[i].name, stree.nodes[i].age);
printf("%7d%7d %-*s %8.5f",
stree.nodes[i].father + 1, i + 1, maxlname+6, stree.nodes[i].name, stree.nodes[i].age);
if (stree.nodes[i].nson)
printf(" (%2d %2d)", stree.nodes[i].sons[0] + 1, stree.nodes[i].sons[1] + 1);
......