Skip to content
Commits on Source (7)
......@@ -11,10 +11,13 @@
# USEREADLINE - link with readline libraries. Must have readline installed. Windows set to no.
# USEBOOST - link with boost libraries. Must install boost. Allows the make.contigs command to read .gz files.
# USEHDF5 - link with HDF5cpp libraries. Must install HDF5. Allows the biom.info command to read Biom format 2.0.
# USEGSL - link with GNU Scientific libraries. Must install GSL. Allows the estimiator.single command to find diversity estimates.
# HDF5_LIBRARY_DIR - location of HDF5 libraries
# HDF5_INCLUDE_DIR - location of HDF5 include files
# BOOST_LIBRARY_DIR - location of boost libraries
# BOOST_INCLUDE_DIR - location of boost include files
# GSL_LIBRARY_DIR - location of GSL libraries
# GSL_INCLUDE_DIR - location of GSL include files
# MOTHUR_FILES - The MOTHUR_FILES parameter is optional, but allows you to set a default location for mothur to look for input files it can't find. This is often used for reference files you want to store in one location separate from your data.
PREFIX := ${CURDIR}
......@@ -23,13 +26,16 @@ OPTIMIZE ?= yes
USEREADLINE ?= yes
USEBOOST ?= no
USEHDF5 ?= no
USEGSL ?= no
LOGFILE_NAME ?= no
BOOST_LIBRARY_DIR ?= "\"Enter_your_boost_library_path_here\""
BOOST_INCLUDE_DIR ?= "\"Enter_your_boost_include_path_here\""
HDF5_LIBRARY_DIR ?= "\"Enter_your_HDF5_library_path_here\""
HDF5_INCLUDE_DIR ?= "\"Enter_your_HDF5_include_path_here\""
GSL_LIBRARY_DIR ?= "\"Enter_your_GSL_library_path_here\""
GSL_INCLUDE_DIR ?= "\"Enter_your_GSL_include_path_here\""
MOTHUR_FILES="\"Enter_your_default_path_here\""
VERSION = "\"1.41.2\""
VERSION = "\"1.42.1\""
# Set a static logfile name
......@@ -77,6 +83,14 @@ CXXFLAGS += -DUSE_HDF5 -I ${HDF5_INCLUDE_DIR}
endif
#User specified GSL library
ifeq ($(strip $(USEGSL)),yes)
LDFLAGS += -L ${GSL_LIBRARY_DIR} -lgsl
CXXFLAGS += -DUSE_GSL -I ${GSL_INCLUDE_DIR}
endif
#
# INCLUDE directories for mothur
#
......
This diff is collapsed.
mothur (1.42.1-1) unstable; urgency=medium
* New upstream version
* debhelper-compat 12
* Standards-Version: 4.4.0
* Use secure URI in Homepage field.
-- Andreas Tille <tille@debian.org> Thu, 01 Aug 2019 19:24:04 +0200
mothur (1.41.21-1) unstable; urgency=medium
* Do not report pre-releases
......
......@@ -5,16 +5,16 @@ Uploaders: Steffen Moeller <moeller@debian.org>,
Tomasz Buchert <tomasz@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 12~),
Build-Depends: debhelper-compat (= 12),
gfortran,
libboost-iostreams-dev,
libopenmpi-dev,
libreadline-dev,
zlib1g-dev
Standards-Version: 4.3.0
Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/mothur
Vcs-Git: https://salsa.debian.org/med-team/mothur.git
Homepage: http://www.mothur.org
Homepage: https://www.mothur.org
Package: mothur
Architecture: any
......
......@@ -2,8 +2,9 @@
USEREADLINE ?= yes
USEBOOST ?= yes
USEHDF5 ?= yes
USEGSL ?= yes
LOGFILE_NAME ?= yes
VERSION = "\"1.41.0\""
VERSION = "\"1.42.1\""
# Optimize to level 3:
CXXFLAGS += -O3 -std=c++11
......@@ -69,6 +70,17 @@ ifeq ($(strip $(USEHDF5)),yes)
CXXFLAGS += -DUSE_HDF5 -I ${HDF5_INCLUDE_DIR}
endif
#User specified GSL library
ifeq ($(strip $(USEGSL)),yes)
GSL_LIBRARY_DIR ?= "\"/usr/local/gsl/lib\""
GSL_INCLUDE_DIR ?= "\"/usr/local/gsl/include\""
LDFLAGS += -L ${GSL_LIBRARY_DIR}
LIBS += ${GSL_LIBRARY_DIR}/libgsl.a
CXXFLAGS += -DUSE_GSL -I ${GSL_INCLUDE_DIR}
endif
#
# INCLUDE directories for mothur
......
This diff is collapsed.
//
// diversityutils.hpp
// Mothur
//
// Created by Sarah Westcott on 4/11/19.
// Copyright © 2019 Schloss Lab. All rights reserved.
//
#ifndef diversityutils_hpp
#define diversityutils_hpp
#define HI_PRECISION 1.0e-12
#define LO_PRECISION 1.0e-7
#define V_MULT 25.0
#define PENALTY 1.0e20
#define SLICE 10
#include "mothurout.h"
#include "utils.hpp"
#include "sabundvector.hpp"
/***********************************************************************/
class DiversityUtils {
public:
DiversityUtils(string met){ m = MothurOut::getInstance(); method = met; }
#ifdef USE_GSL
double logLikelihood(int n, double dAlpha, double dBeta);
int bessel(double* pdResult, int n, double dAlpha, double dBeta);
double sd(int n, double dAlpha, double dBeta);
double logLikelihood(int n, double dAlpha, double dBeta, double);
int bessel(double* pdResult, int n, double dAlpha, double dBeta, double);
double sd(int n, double dAlpha, double dBeta, double);
int minimiseSimplex(gsl_vector* ptX, size_t nP, void* pvData, double (*f)(const gsl_vector*, void* params), double, double, double);
void mcmc(t_Params *ptParams, t_Data *ptData, gsl_vector* ptX, void* f (void * pvInitMetro));
void outputResults(gsl_vector *ptX, t_Data *ptData, double (*f)(const gsl_vector*, void* params));
void getProposal(gsl_rng *ptGSLRNG, gsl_vector *ptXDash, gsl_vector *ptX, int* pnSDash, int nS, t_Params *ptParams);
int solveF(double x_lo, double x_hi, void* params, double tol, double *xsolve);
double logLikelihoodRampal(int n, double dMDash, double dV);
double logLikelihoodQuad(int n, double dMDash, double dV);
#endif
double f2X(double x, double dA, double dB, double dNDash);
double fX(double x, double dA, double dB, double dNDash);
double chao(t_Data *ptData);
void loadAbundance(t_Data *ptData, SAbundVector* rank);
void freeAbundance(t_Data *ptData);
private:
Utils util;
MothurOut* m;
string method;
};
/***********************************************************************/
#endif /* diversityutils_hpp */
//
// erarefaction.cpp
// Mothur
//
// Created by Sarah Westcott on 4/3/19.
// Copyright © 2019 Schloss Lab. All rights reserved.
//
#include "erarefaction.hpp"
/***********************************************************************/
double ERarefaction::getValues(SAbundVector* rank, int n){
try {
int maxRank = rank->getMaxRank();
int sampled = rank->getNumSeqs(); //nl
int numOTUs = rank->getNumBins(); //ns
double dSum = 0.0;
#ifdef USE_GSL
double dDenom = gsl_sf_lnchoose(sampled, n);
for(int i = 1; i <= maxRank; i++){
if (m->getControl_pressed()) { break; }
int abund = rank->get(i);
if (abund != 0) {
int thisRank = i; //nA
if(sampled - thisRank >= n){
double dNumer = gsl_sf_lnchoose(sampled - thisRank, n);
dSum += ((double) abund)*exp(dNumer - dDenom);
}
}
}
#endif
double result = ((double) numOTUs) - dSum;
if (isnan(result) || isinf(result)) { result = 0; }
return result;
}
catch(exception& e) {
m->errorOut(e, "ERarefaction", "getValues");
exit(1);
}
}
/***********************************************************************/
//
// erarefaction.hpp
// Mothur
//
// Created by Sarah Westcott on 4/3/19.
// Copyright © 2019 Schloss Lab. All rights reserved.
//
#ifndef erarefaction_hpp
#define erarefaction_hpp
#include "mothurout.h"
#include "sabundvector.hpp"
/***********************************************************************/
class ERarefaction {
public:
ERarefaction(){ m = MothurOut::getInstance(); }
double getValues(SAbundVector* rank, int n);
bool requiresSample() { return false; }
private:
Utils util;
MothurOut* m;
};
/***********************************************************************/
#endif /* erarefaction_hpp */
......@@ -101,16 +101,23 @@ public:
void getFreqs(Sequence seq) {
string curAligned = seq.getAligned(); numSeqs++;
for(int j=0;j<alignmentLength;j++){
if(toupper(curAligned[j]) == 'A') { a[j]++; }
else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[j]) == 'U') { t[j]++; }
else if(toupper(curAligned[j]) == 'G') { g[j]++; }
else if(toupper(curAligned[j]) == 'C') { c[j]++; }
else if(curAligned[j] == '-' || curAligned[j] == '.') { gap[j]++; }
}
}
string curAligned = seq.getAligned();
getFreqs(curAligned);
}
void getFreqs(string seq) {
string curAligned = seq; numSeqs++;
for(int j=0;j<alignmentLength;j++){
if(toupper(curAligned[j]) == 'A') { a[j]++; }
else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[j]) == 'U') { t[j]++; }
else if(toupper(curAligned[j]) == 'G') { g[j]++; }
else if(toupper(curAligned[j]) == 'C') { c[j]++; }
else if(curAligned[j] == '-' || curAligned[j] == '.') { gap[j]++; }
}
}
protected:
string filter;
......
//
// igabundance.cpp
// Mothur
//
// Created by Sarah Westcott on 4/3/19.
// Copyright © 2019 Schloss Lab. All rights reserved.
//
#include "igabundance.hpp"
/***********************************************************************/
vector<double> IGAbundance::getValues(SAbundVector* rank, vector<mcmcSample>& sampling) {
try {
int maxRank = rank->getMaxRank(); //nMax
maxRank = floor(pow(2.0,ceil(log((double) maxRank)/log(2.0)) + 2.0) + 1.0e-7);
vector<double> results; results.resize(maxRank, 0.0);
int nSamples = sampling.size();
if (nSamples == 0) { return results; }
#ifdef USE_GSL
DiversityUtils dutils("igabund");
for(int i = 0; i < sampling.size(); i++) {
if (m->getControl_pressed()) { break; }
for (int j = 1; j <= maxRank; j++) {
int nA = j;
double dLog = 0.0, dP = 0.0;
dLog = dutils.logLikelihood(nA, sampling[i].alpha, sampling[i].beta);
dP = exp(dLog);
results[j - 1] += dP*sampling[i].ns;
}
}
for (int i = 1; i<=maxRank; i++) {
results[i-1] /= (double)nSamples;
if (isnan(results[i-1]) || isinf(results[i-1])) { results[i-1] = 0.0; }
}
#endif
return results;
}
catch(exception& e) {
m->errorOut(e, "IGAbundance", "getValues");
exit(1);
}
}
/***********************************************************************/
//
// igabundance.hpp
// Mothur
//
// Created by Sarah Westcott on 4/3/19.
// Copyright © 2019 Schloss Lab. All rights reserved.
//
#ifndef igabundance_hpp
#define igabundance_hpp
#include "diversityutils.hpp"
/***********************************************************************/
class IGAbundance {
public:
IGAbundance() { m = MothurOut::getInstance(); }
vector<double> getValues(SAbundVector* rank, vector<mcmcSample>& sampling);
bool requiresSample() { return true; }
private:
Utils util;
MothurOut* m;
};
/***********************************************************************/
#endif /* igabundance_hpp */
//
// igrarefaction.cpp
// Mothur
//
// Created by Sarah Westcott on 5/6/19.
// Copyright © 2019 Schloss Lab. All rights reserved.
//
#include "igrarefaction.hpp"
/***********************************************************************/
int compare_doubles(const void* a, const void* b)
{
double* arg1 = (double *) a;
double* arg2 = (double *) b;
if( *arg1 < *arg2 ) return -1;
else if( *arg1 == *arg2 ) return 0;
else return 1;
}
#ifdef USE_GSL
/***********************************************************************/
double IGRarefaction::calcMu(t_IGParams *ptIGParams)
{
double dMu = 0.0;
ptIGParams->dC = coverage;
DiversityUtils dutils("igrarefaction");
dutils.solveF(1.0, 1.0e10, ptIGParams, 1.0e-7, &dMu);
return dMu;
}
#endif
/***********************************************************************/
vector<double> IGRarefaction::getValues(SAbundVector* rank, vector<mcmcSample>& sampling){
try {
t_Data tData;
vector<double> results;
#ifdef USE_GSL
DiversityUtils dutils("igrarefaction");
dutils.loadAbundance(&tData, rank);
int sampled = rank->getNumSeqs(); //nj
int numOTUs = rank->getNumBins(); //nl
int nSamples = sampling.size();
double* adMu = NULL;
double dLower = 0.0, dMedian = 0.0, dUpper = 0.0;
gsl_set_error_handler_off();
t_IGParams* atIGParams;
atIGParams = (t_IGParams *) malloc(nSamples*sizeof(t_IGParams)); //MAX_SAMPLES
//load sampling data
for (int i = 0; i < nSamples; i++) {
if (m->getControl_pressed()) { return results; }
atIGParams[i].dAlpha = sampling[i].alpha;
atIGParams[i].dBeta = sampling[i].beta;
atIGParams[i].nS = sampling[i].ns;
}
adMu = (double *) malloc(sizeof(double)*nSamples);
//printf("numSample = %d ",nSamples);
for(int i = 0; i < nSamples; i++){
adMu[i] = ((double) sampled)*calcMu(&atIGParams[i]);
//printf("%f\n", adMu[i]);
//fflush(stdout);
}
qsort(adMu, nSamples, sizeof(double), compare_doubles);
dLower = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.025);
dMedian = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.5);
dUpper = gsl_stats_quantile_from_sorted_data(adMu, 1, nSamples, 0.975);
m->mothurOut("\nIGRarefaction - d_Lower = " + toString(dLower) + " d_Median = " + toString(dMedian) + " d_Upper = " + toString(dUpper) + "\n\n");
if (isnan(dLower) || isinf(dLower)) { dLower = 0; }
if (isnan(dMedian) || isinf(dMedian)) { dMedian = 0; }
if (isnan(dUpper) || isinf(dUpper)) { dUpper = 0; }
results.push_back(dLower); results.push_back(dMedian); results.push_back(dUpper);
free(adMu);
dutils.freeAbundance(&tData);
#endif
return results;
}
catch(exception& e) {
m->errorOut(e, "IGRarefaction", "getValues");
exit(1);
}
}
/***********************************************************************/
//
// igrarefaction.hpp
// Mothur
//
// Created by Sarah Westcott on 5/6/19.
// Copyright © 2019 Schloss Lab. All rights reserved.
//
#ifndef igrarefaction_hpp
#define igrarefaction_hpp
#include "diversityutils.hpp"
//IGRarefaction
/***********************************************************************/
class IGRarefaction {
public:
IGRarefaction(double c) : coverage(c) { m = MothurOut::getInstance(); }
~IGRarefaction() {}
vector<double> getValues(SAbundVector* rank, vector<mcmcSample>& sampling);
bool requiresSample() { return true; }
private:
Utils util;
MothurOut* m;
double coverage;
double calcMu(t_IGParams *ptIGParams);
};
/***********************************************************************/
#endif /* igrarefaction_hpp */
//
// lnabundace.cpp
// Mothur
//
// Created by Sarah Westcott on 5/8/19.
// Copyright © 2019 Schloss Lab. All rights reserved.
//
#include "lnabundance.hpp"
double f1_0(double x, void *pvParams)
{
t_LNParams *ptLNParams = (t_LNParams *) pvParams;
double dMDash = ptLNParams->dMDash, dV = ptLNParams->dV, n = ptLNParams->n;
double dTemp = (x - dMDash);
double dExp = x*((double) n) - exp(x) - 0.5*((dTemp*dTemp)/dV);
double dRet = exp(dExp);
return dRet;
}
#ifdef USE_GSL
double logLikelihoodRampal2(int n, double dMDash, double dV)
{
double dN = (double) n;
double dLogLik = 0.0, dTemp = gsl_pow_int(log(dN) - dMDash,2), dTemp3 = gsl_pow_int(log(dN) - dMDash,3);
dLogLik = -0.5*log(2.0*M_PI*dV) - log(dN) - (dTemp/(2.0*dV));
dLogLik += log(1.0 + 1.0/(2.0*dN*dV)*(dTemp/dV + log(dN) - dMDash - 1.0)
+ 1.0/(6.0*dN*dN*dV*dV*dV)*(3.0*dV*dV - (3.0*dV - 2.0*dV*dV)*(dMDash - log(dN))
- 3.0*dV*dTemp + dTemp3));
return dLogLik;
}
double derivExponent1(double x, void *pvParams)
{
t_LNParams *ptLNParams = (t_LNParams *) pvParams;
double dMDash = ptLNParams->dMDash, dV = ptLNParams->dV, n = ptLNParams->n;
double dTemp = (x - dMDash)/dV, dRet = 0.0;
dRet = ((double) n) - exp(x) - dTemp;
return dRet;
}
int solveF(double x_lo, double x_hi, double (*f)(double, void*),
void* params, double tol, double *xsolve)
{
int status, iter = 0, max_iter = 100;
const gsl_root_fsolver_type *T;
gsl_root_fsolver *s;
double r = 0;
gsl_function F;
F.function = f;
F.params = params;
T = gsl_root_fsolver_brent;
s = gsl_root_fsolver_alloc (T);
gsl_root_fsolver_set (s, &F, x_lo, x_hi);
do{
iter++;
status = gsl_root_fsolver_iterate (s);
r = gsl_root_fsolver_root (s);
x_lo = gsl_root_fsolver_x_lower (s);
x_hi = gsl_root_fsolver_x_upper (s);
status = gsl_root_test_interval (x_lo, x_hi, 0, tol);
}
while (status == GSL_CONTINUE && iter < max_iter);
(*xsolve) = gsl_root_fsolver_root (s);
gsl_root_fsolver_free (s);
return status;
}
double logLikelihoodQuad2(int n, double dMDash, double dV)
{
gsl_integration_workspace *ptGSLWS =
gsl_integration_workspace_alloc(1000);
double dLogFac1 = 0.0, dLogFacN = 0.0;
double dResult = 0.0, dError = 0.0, dPrecision = 0.0;
gsl_function tGSLF;
t_LNParams tLNParams;
double dEst = dMDash + ((double)n)*dV, dA = 0.0, dB = 0.0;
tLNParams.n = n; tLNParams.dMDash = dMDash; tLNParams.dV = dV;
tGSLF.function = &f1_0;
tGSLF.params = (void *) &tLNParams;
dLogFac1 = log(2.0*M_PI*dV);
//DiversityUtils dutils("lnabund");
if(n < 50){
dLogFacN = gsl_sf_fact(n);
dLogFacN = log(dLogFacN);
}
else{
dLogFacN = gsl_sf_lngamma(((double) n) + 1.0);
}
if(dEst > dV){
double dMax = 0.0;
double dUpper = (((double) n) + (dMDash/dV) - 1.0)/(1.0 + 1.0/dV);
double dVar = 0.0;
solveF(0.0, dUpper, derivExponent1, (void *) &tLNParams, 1.0e-5, &dMax);
dVar = sqrt(1.0/((1.0/dV) + exp(dMax)));
dA = dMax - V_MULT*dVar; dB = dMax + V_MULT*dVar;
}
else{
double dMax = 0.0;
double dLower = dEst - dV;
double dUpper = (((double) n) + (dMDash/dV) - 1.0)/(1.0 + 1.0/dV);
double dVar = 0.0;
solveF(dLower, dUpper,derivExponent1, (void *) &tLNParams, 1.0e-5, &dMax);
dVar = sqrt(1.0/((1.0/dV) + exp(dMax)));
dA = dMax - V_MULT*dVar; dB = dMax + V_MULT*dVar;
}
if(n < 10){
dPrecision = HI_PRECISION;
}
else{
dPrecision = LO_PRECISION;
}
printf("dA = %f dB = %f dPrecision = %f, GSL_INTEG_GAUSS61 %d maxlevel %zu \n", dA, dB, dPrecision, GSL_INTEG_GAUSS61, ptGSLWS->maximum_level);
printf("alist = %f blist = %f rlist = %f elist = %f order = %zu level = %zu \n", *ptGSLWS->alist, *ptGSLWS->blist, *ptGSLWS->rlist, *ptGSLWS->elist, *ptGSLWS->order, *ptGSLWS->level);
gsl_integration_qag(&tGSLF, dA, dB, dPrecision, 0.0, 1000, GSL_INTEG_GAUSS61, ptGSLWS, &dResult, &dError);
gsl_integration_workspace_free(ptGSLWS);
return log(dResult) - dLogFacN -0.5*dLogFac1;
}
#endif
/***********************************************************************/
vector<double> LNAbundance::getValues(SAbundVector* rank, vector<mcmcSample>& sampling) {
try {
int maxRank = rank->getMaxRank(); //nMax
maxRank = floor(pow(2.0,ceil(log((double) maxRank)/log(2.0)) + 2.0) + 1.0e-7);
vector<double> results; results.resize(maxRank, 0.0);
int nSamples = sampling.size();
if (nSamples == 0) { return results; }
printf("M_PI = %f\n",M_PI);
#ifdef USE_GSL
DiversityUtils dutils("lnabund");
printf("nMax = %d \n",maxRank);
for(int i = 0; i < sampling.size(); i++) {
if (m->getControl_pressed()) { break; }
printf("dmDash = %f dV = %f \n",sampling[i].alpha, sampling[i].beta);
for (int j = 1; j <= maxRank; j++) {
int nA = j;
double dLog = 0.0, dP = 0.0;
if(nA < 100){ //MAX_QUAD
dLog = logLikelihoodQuad2(nA, sampling[i].alpha, sampling[i].beta); //nA, dMDash, dV
}
else{
dLog = logLikelihoodRampal2(nA, sampling[i].alpha, sampling[i].beta); //nA, dMDash, dV
}
dP = exp(dLog);
results[j - 1] += dP*sampling[i].ns;
cout << j << '\t' << dP << '\t' << results[j-1] << endl;
}
}
for (int i = 1; i<=maxRank; i++) {
results[i-1] /= (double)nSamples;
if (isnan(results[i-1]) || isinf(results[i-1])) { results[i-1] = 0.0; }
cout <<i << '\t' << results[i-1] << endl;
}
#endif
return results;
}
catch(exception& e) {
m->errorOut(e, "LNAbundance", "getValues");
exit(1);
}
}
/***********************************************************************/
//
// lnabundace.hpp
// Mothur
//
// Created by Sarah Westcott on 5/8/19.
// Copyright © 2019 Schloss Lab. All rights reserved.
//
#ifndef lnabundace_hpp
#define lnabundace_hpp
#include "diversityutils.hpp"
/***********************************************************************/
class LNAbundance {
public:
LNAbundance() { m = MothurOut::getInstance(); }
vector<double> getValues(SAbundVector* rank, vector<mcmcSample>& sampling);
bool requiresSample() { return true; }
private:
Utils util;
MothurOut* m;
};
/***********************************************************************/
#endif /* lnabundace_hpp */
//
// metroig.cpp
// Mothur
//
// Created by Sarah Westcott on 4/8/19.
// Copyright © 2019 Schloss Lab. All rights reserved.
//
#include "metroig.hpp"
/*constants for simplex minimisation*/
#ifdef USE_GSL
/***********************************************************************/
double nLogLikelihood0(const gsl_vector * x, void * params)
{
double dAlpha = gsl_vector_get(x,0), dBeta = gsl_vector_get(x,1);
int nS = (int) floor(gsl_vector_get(x, 2));
t_Data *ptData = (t_Data *) params;
int i = 0;
double dLogL = 0.0;
double dLog0 = 0.0, dLog1 = 0.0, dLog2 = 0.0, dLog3 = 0.0;
if(dAlpha <= 0.0 || dBeta <= 0.0){
return PENALTY;
}
DiversityUtils dutils("metroig");
for(i = 0; i < ptData->nNA; i++){
double dLogP = 0.0;
int nA = ptData->aanAbund[i][0];
dLogP = dutils.logLikelihood(nA, dAlpha, dBeta);
dLogL += ((double) ptData->aanAbund[i][1])*dLogP;
dLogL -= gsl_sf_lnfact(ptData->aanAbund[i][1]);
}
dLog0 = dutils.logLikelihood(0, dAlpha, dBeta);
dLog1 = (nS - ptData->nL)*dLog0;
dLog2 = - gsl_sf_lnfact(nS - ptData->nL);
dLog3 = gsl_sf_lnfact(nS);
dLogL += dLog1 + dLog2 + dLog3;
/*return*/
return -dLogL;
}
/***********************************************************************/
double negLogLikelihood0(double dAlpha, double dBeta, int nS, void * params)
{
t_Data *ptData = (t_Data *) params;
int i = 0;
double dLogL = 0.0;
double dLog0 = 0.0, dLog1 = 0.0, dLog2 = 0.0, dLog3 = 0.0;
if(dAlpha <= 0.0 || dBeta <= 0.0){
return PENALTY;
}
DiversityUtils dutils("metroig");
for(i = 0; i < ptData->nNA; i++){
double dLogP = 0.0;
int nA = ptData->aanAbund[i][0];
dLogP = dutils.logLikelihood(nA, dAlpha, dBeta);
dLogL += ((double) ptData->aanAbund[i][1])*dLogP;
dLogL -= gsl_sf_lnfact(ptData->aanAbund[i][1]);
}
dLog0 = dutils.logLikelihood(0, dAlpha, dBeta);
dLog1 = (nS - ptData->nL)*dLog0;
dLog2 = - gsl_sf_lnfact(nS - ptData->nL);
dLog3 = gsl_sf_lnfact(nS);
dLogL += dLog1 + dLog2 + dLog3;
/*return*/
return -dLogL;
}
/***********************************************************************/
void* metropolis0 (void * pvInitMetro)
{
t_MetroInit *ptMetroInit = (t_MetroInit *) pvInitMetro;
gsl_vector *ptX = ptMetroInit->ptX;
t_Data *ptData = ptMetroInit->ptData;
t_Params *ptParams = ptMetroInit->ptParams;
gsl_vector *ptXDash = gsl_vector_alloc(3); /*proposal*/
char *szSampleFile = (char *) malloc(1024*sizeof(char));
const gsl_rng_type *T;
gsl_rng *ptGSLRNG;
//FILE *sfp = NULL;
int nS = 0, nSDash = 0, nIter = 0;
double dRand = 0.0, dNLL = 0.0;
void *pvRet = NULL;
/*set up random number generator*/
T = gsl_rng_default;
ptGSLRNG = gsl_rng_alloc (T);
nS = (int) floor(gsl_vector_get(ptX,2));
dNLL = negLogLikelihood0(gsl_vector_get(ptX,0), gsl_vector_get(ptX,1), nS,(void*) ptData);
string filename = ptParams->szOutFileStub + "_" + toString(ptMetroInit->nThread) + ".sample";
ofstream out; Utils util; util.openOutputFile(filename, out);
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
/*seed random number generator*/
gsl_rng_set(ptGSLRNG, ptMetroInit->lSeed);
DiversityUtils dutils("metroig");
/*now perform simple Metropolis algorithm*/
while(nIter < ptParams->nIter){
double dA = 0.0, dNLLDash = 0.0;
dutils.getProposal(ptGSLRNG, ptXDash, ptX, &nSDash, nS, ptParams);
dNLLDash = negLogLikelihood0(gsl_vector_get(ptXDash,0), gsl_vector_get(ptXDash,1), nSDash, (void*) ptData);
dA = exp(dNLL - dNLLDash);
if(dA > 1.0){
dA = 1.0;
}
dRand = gsl_rng_uniform(ptGSLRNG);
if(dRand < dA){
gsl_vector_memcpy(ptX, ptXDash);
nS = nSDash;
dNLL = dNLLDash;
ptMetroInit->nAccepted++;
}
if(nIter % SLICE == 0){
out << nIter << "," << gsl_vector_get(ptX, 0) << "," << gsl_vector_get(ptX, 1) << "," << nS << "," << dNLL << endl;
}
nIter++;
}
out.close();
/*free up allocated memory*/
gsl_vector_free(ptXDash);
free(szSampleFile);
gsl_rng_free(ptGSLRNG);
return pvRet;
}
#endif
/***********************************************************************/
vector<string> MetroIG::getValues(SAbundVector* rank){
try {
t_Params tParams; tParams.nIter = nIters; tParams.dSigmaX = sigmaA; tParams.dSigmaY = sigmaB; tParams.dSigmaS = sigmaS; tParams.szOutFileStub = outFileStub; tParams.lSeed = m->getRandomSeed();
t_Data tData;
#ifdef USE_GSL
DiversityUtils dutils("metroig");
dutils.loadAbundance(&tData, rank);
gsl_vector* ptX = gsl_vector_alloc(3); /*parameter estimates*/
int sampled = rank->getNumSeqs(); //nj
int numOTUs = rank->getNumBins(); //nl
gsl_rng_env_setup();
gsl_set_error_handler_off();
/*set initial estimates for parameters*/
gsl_vector_set(ptX, 0, 1.0);
gsl_vector_set(ptX, 1, 5.0);
gsl_vector_set(ptX, 2, numOTUs*2);
double chaoResult = dutils.chao(&tData);
m->mothurOut("\nMetroIG - D = " + toString(numOTUs) + " L = " + toString(sampled) + " Chao = " + toString(chaoResult) + "\n");
dutils.minimiseSimplex(ptX, 3, (void*) &tData, &nLogLikelihood0, 0.1, 1.0e-2, 100000);
dutils.outputResults(ptX, &tData, &nLogLikelihood0);
if(tParams.nIter > 0){ dutils.mcmc(&tParams, &tData, ptX, &metropolis0); }
/*free up allocated memory*/
gsl_vector_free(ptX);
dutils.freeAbundance(&tData);
#endif
vector<string> outputs;
outputs.push_back(outFileStub + "_0.sample");
outputs.push_back(outFileStub + "_1.sample");
outputs.push_back(outFileStub + "_2.sample");
return outputs;
}
catch(exception& e) {
m->errorOut(e, "MetroIG", "getValues");
exit(1);
}
}
/***********************************************************************/
//
// metroig.hpp
// Mothur
//
// Created by Sarah Westcott on 4/8/19.
// Copyright © 2019 Schloss Lab. All rights reserved.
//
#ifndef metroig_hpp
#define metroig_hpp
#include "diversityutils.hpp"
/***********************************************************************/
class MetroIG {
public:
MetroIG(double sigA, double sigB, double sigS, int n, string stub) : sigmaA(sigA), sigmaB(sigB), sigmaS(sigS), nIters(n), outFileStub(stub) { m = MothurOut::getInstance(); }
vector<string> getValues(SAbundVector* rank);
bool requiresSample() { return false; }
private:
Utils util;
MothurOut* m;
double sigmaA, sigmaB, sigmaS;
int nIters;
string outFileStub;
};
/***********************************************************************/
#endif /* metroig_hpp */
//
// metrolognormal.c
// Mothur
//
// Created by Sarah Westcott on 4/25/19.
// Copyright © 2019 Schloss Lab. All rights reserved.
//
#include "metrolognormal.hpp"
/*constants for calculated compound Poisson lognormal*/
#ifdef USE_GSL
/***********************************************************************/
double nLogLikelihood1(const gsl_vector * x, void * params)
{
double dMDash = gsl_vector_get(x,0), dV = gsl_vector_get(x,1);
int nS = (int) floor(gsl_vector_get(x, 2));
t_Data *ptData = (t_Data *) params;
int i = 0;
double dLogL = 0.0;
DiversityUtils dutils("metroln");
for(i = 0; i < ptData->nNA; i++){
double dLogP = 0.0;
int nA = ptData->aanAbund[i][0];
if(nA < 100){
dLogP = dutils.logLikelihoodQuad(nA, dMDash, dV);
}
else{
dLogP = dutils.logLikelihoodRampal(nA, dMDash, dV);
}
dLogL += ((double) ptData->aanAbund[i][1])*dLogP;
dLogL -= gsl_sf_lnfact(ptData->aanAbund[i][1]);
}
dLogL += (nS - ptData->nL)*dutils.logLikelihoodQuad(0, dMDash, dV);
dLogL -= gsl_sf_lnfact(nS - ptData->nL);
dLogL += gsl_sf_lnfact(nS);
/*return*/
return -dLogL;
}
/***********************************************************************/
double negLogLikelihood1(double dMDash, double dV, int nS, void * params)
{
t_Data *ptData = (t_Data *) params;
int i = 0;
double dLog0 = 0.0, dLogL = 0.0;
DiversityUtils dutils("metroln");
for(i = 0; i < ptData->nNA; i++){
double dLogP = 0.0;
int nA = ptData->aanAbund[i][0];
if(nA < 100){
dLogP = dutils.logLikelihoodQuad(nA, dMDash, dV);
}
else{
dLogP = dutils.logLikelihoodRampal(nA, dMDash, dV);
}
dLogL += ((double) ptData->aanAbund[i][1])*dLogP;
dLogL -= gsl_sf_lnfact(ptData->aanAbund[i][1]);
}
dLog0 = dutils.logLikelihoodQuad(0, dMDash, dV);
if(nS > ptData->nL){
dLogL += (nS - ptData->nL)*dLog0;
}
dLogL -= gsl_sf_lnfact(nS - ptData->nL);
dLogL += gsl_sf_lnfact(nS);
/*return*/
return -dLogL;
}
/***********************************************************************/
void* metropolis1 (void * pvInitMetro)
{
t_MetroInit *ptMetroInit = (t_MetroInit *) pvInitMetro;
gsl_vector *ptX = ptMetroInit->ptX;
t_Data *ptData = ptMetroInit->ptData;
t_Params *ptParams = ptMetroInit->ptParams;
gsl_vector *ptXDash = gsl_vector_alloc(3); /*proposal*/
const gsl_rng_type *T;
gsl_rng *ptGSLRNG;
int nS = 0, nSDash = 0,nIter = 0;
double dRand = 0.0, dNLL = 0.0;
void *pvRet = NULL;
double dM = 0.0, dV = 0.0;
double dMDash = 0.0, dVDash = 0.0;
double dXDash = 0.0, dX = 0.0;
/*set up random number generator*/
T = gsl_rng_default;
ptGSLRNG = gsl_rng_alloc (T);
nS = (int) floor(gsl_vector_get(ptX,2));
dNLL = negLogLikelihood1(gsl_vector_get(ptX,0), gsl_vector_get(ptX,1), nS,(void*) ptData);
dM = gsl_vector_get(ptX,0); dV = gsl_vector_get(ptX,1);
gsl_vector_set(ptX,0,dM + 0.5*dV);
string filename = ptParams->szOutFileStub + "_" + toString(ptMetroInit->nThread) + ".sample";
ofstream out; Utils util; util.openOutputFile(filename, out);
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
/*seed random number generator*/
gsl_rng_set(ptGSLRNG, ptMetroInit->lSeed);
DiversityUtils dutils("metroln");
/*now perform simple Metropolis algorithm*/
while(nIter < ptParams->nIter){
double dA = 0.0, dNLLDash = 0.0;
dutils.getProposal(ptGSLRNG, ptXDash, ptX, &nSDash, nS,ptParams);
dXDash = gsl_vector_get(ptXDash,0); dVDash = gsl_vector_get(ptXDash,1);
dMDash = dXDash - 0.5*dVDash;
dNLLDash = negLogLikelihood1(dMDash, dVDash, nSDash, (void*) ptData);
dA = exp(dNLL - dNLLDash);
if(dA > 1.0){
dA = 1.0;
}
dRand = gsl_rng_uniform(ptGSLRNG);
if(dRand < dA){
ptMetroInit->nAccepted++;
gsl_vector_memcpy(ptX, ptXDash);
nS = nSDash;
dNLL = dNLLDash;
}
if(nIter % 10 == 0){
dX = gsl_vector_get(ptX,0); dV = gsl_vector_get(ptX,1);
dM = dX - 0.5*dV;
out << nIter << "," << dM << "," << dV << "," << nS << "," << dNLL << endl;
}
nIter++;
}
out.close();
/*free up allocated memory*/
gsl_vector_free(ptXDash);
gsl_rng_free(ptGSLRNG);
return pvRet;
}
#endif
/***********************************************************************/
vector<string> MetroLogNormal::getValues(SAbundVector* rank){
try {
t_Params tParams; tParams.nIter = nIters; tParams.dSigmaX = sigmaX; tParams.dSigmaY = sigmaY; tParams.dSigmaS = sigmaS; tParams.szOutFileStub = outFileStub; tParams.lSeed = m->getRandomSeed();
t_Data tData;
#ifdef USE_GSL
DiversityUtils dutils("metroln");
dutils.loadAbundance(&tData, rank);
gsl_vector* ptX = gsl_vector_alloc(3);
gsl_rng_env_setup();
gsl_set_error_handler_off();
dutils.loadAbundance(&tData, rank);
int sampled = rank->getNumSeqs(); //nj
int numOTUs = rank->getNumBins(); //nl
gsl_vector_set(ptX, 0, 1.0); //INIT_M_DASH
gsl_vector_set(ptX, 1, 1.0); //INIT_V
gsl_vector_set(ptX, 2, numOTUs*2);
double chaoResult = dutils.chao(&tData);
m->mothurOut("\nMetroLogNormal - D = " + toString(numOTUs) + " L = " + toString(sampled) + " Chao = " + toString(chaoResult) + "\n");
dutils.minimiseSimplex(ptX, 3, (void*) &tData, &nLogLikelihood1, 1.0, 1.0e-2, 100000);
dutils.outputResults(ptX, &tData, &nLogLikelihood1);
if(tParams.nIter > 0){ dutils.mcmc(&tParams, &tData, ptX, &metropolis1); }
gsl_vector_free(ptX);
dutils.freeAbundance(&tData);
#endif
vector<string> outputs;
outputs.push_back(outFileStub + "_0.sample");
outputs.push_back(outFileStub + "_1.sample");
outputs.push_back(outFileStub + "_2.sample");
return outputs;
}
catch(exception& e) {
m->errorOut(e, "MetroLogNormal", "getValues");
exit(1);
}
}
/***********************************************************************/