Skip to content
Commits on Source (6)
......@@ -27,10 +27,13 @@ env:
- METHOD_TEST=tests/hbltests/libv3/aBSREL.wbf
- METHOD_TEST=tests/hbltests/libv3/BUSTED.wbf
- METHOD_TEST=tests/hbltests/libv3/LEISR.wbf
- METHOD_TEST=tests/hbltests/libv3/BGM.wbf
language: c++
compiler:
- gcc
- clang
addons:
apt:
......@@ -39,17 +42,18 @@ addons:
- ubuntu-toolchain-r-test
- george-edison55-precise-backports
packages:
- g++-6
- gcc-6
- g++-7
- gcc-7
install:
- if [ "$CXX" = "g++" ]; then export CXX="g++-6" CC="gcc-6"; fi
- if [ "$CXX" = "g++" ]; then export CXX="g++-7" CC="gcc-7"; fi
- cmake .
- make HYPHYMP
- make HYPHYGTEST
script:
- export OMP_NUM_THREADS=4
- export OMP_NUM_THREADS=8
- export LD_LIBRARY_PATH=/usr/local/clang/lib:$LD_LIBRARY_PATH
- ./HYPHYGTEST
- ./HYPHYMP LIBPATH=`pwd`/res/ tests/hbltests/libv3/math.bf
- ./HYPHYMP LIBPATH=`pwd`/res/ tests/hbltests/libv3/iofunctions.bf
......
......@@ -20,9 +20,9 @@ macro(PCL_CHECK_FOR_SSE3)
include(CheckCXXSourceRuns)
set(CMAKE_REQUIRED_FLAGS)
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG)
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_REQUIRED_FLAGS "-msse3")
endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG)
endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
check_cxx_source_runs("
#include <pmmintrin.h>
......@@ -46,9 +46,9 @@ macro(PCL_CHECK_FOR_AVX)
include(CheckCXXSourceRuns)
set(CMAKE_REQUIRED_FLAGS)
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG)
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_REQUIRED_FLAGS "-march=corei7-avx -mtune=corei7-avx")
endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG)
endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
check_cxx_source_runs("
#include <immintrin.h>
......@@ -71,6 +71,7 @@ endmacro(PCL_CHECK_FOR_AVX)
set(CMAKE_MODULE_PATH cmake)
set(HYPHY_VERSION 2.1)
#-------------------------------------------------------------------------------
# setup the files we'll be using
#-------------------------------------------------------------------------------
......@@ -94,7 +95,6 @@ set(DEFAULT_WARNING_FLAGS "-w")
set(DEFAULT_DEBUG_WARNING_FLAGS "-Wall -Wno-int-to-pointer-cast -Wno-conversion-null -Wno-sign-compare -Wno-maybe-uninitialized")
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
execute_process(
COMMAND ${CMAKE_CXX_COMPILER} -dumpversion
......@@ -175,6 +175,14 @@ if(NOT DEFINED DEFAULT_WARNING_FLAGS)
set(DEFAULT_WARNING_FLAGS "")
endif(NOT DEFINED DEFAULT_WARNING_FLAGS)
include (CheckLibraryExists)
CHECK_LIBRARY_EXISTS( "libamdlibm.so" "amd_log" "" HAS_AMD_LIBM)
if(HAS_AMD_LIBM)
set(DEFAULT_LINK_FLAGS "${DEFAULT_LINK_FLAGS} -lamdlibm")
add_definitions(-D__USEAMDLOG__)
endif(HAS_AMD_LIBM)
#-------------------------------------------------------------------------------
# OpenMP support
......@@ -188,8 +196,12 @@ endif(NOT ${OPENMP_FOUND})
#-------------------------------------------------------------------------------
# default installation prefix
#-------------------------------------------------------------------------------
set(INSTALL_PREFIX /usr/local CACHE PATH "Installation prefix")
set(CMAKE_INSTALL_PREFIX ${INSTALL_PREFIX} CACHE INTERNAL "Installation prefix" FORCE)
if(NOT DEFINED CMAKE_INSTALL_PREFIX)
set(CMAKE_INSTALL_PREFIX "/usr/local" CACHE PATH "Installation prefix")
endif()
if(NOT DEFINED INSTALL_PREFIX)
set(INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX} CACHE INTERNAL "Installation prefix" FORCE)
endif()
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
set(DEFAULT_LIBRARIES pthread)
else(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
......
hyphy (2.3.14+dfsg-1) unstable; urgency=medium
* New upstream version
* debhelper 11
* Standards-Version: 4.2.1
-- Andreas Tille <tille@debian.org> Wed, 26 Sep 2018 16:55:30 +0200
hyphy (2.3.13+dfsg-1) unstable; urgency=medium
* Team upload.
......
......@@ -9,7 +9,7 @@ Build-Depends: debhelper (>= 11~),
libcurl4-gnutls-dev | libcurl4-dev,
libssl-dev,
libsqlite3-dev
Standards-Version: 4.1.5
Standards-Version: 4.2.1
Vcs-Browser: https://salsa.debian.org/med-team/hyphy
Vcs-Git: https://salsa.debian.org/med-team/hyphy.git
Homepage: http://hyphy.org/
......
......@@ -8,14 +8,14 @@ Description: remove optimizations for the local machine
@@ -47,7 +47,7 @@ macro(PCL_CHECK_FOR_AVX)
set(CMAKE_REQUIRED_FLAGS)
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG)
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- set(CMAKE_REQUIRED_FLAGS "-march=corei7-avx -mtune=corei7-avx")
+ set(CMAKE_REQUIRED_FLAGS "")
endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG)
endif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
check_cxx_source_runs("
@@ -143,7 +143,7 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang
set(DEFAULT_COMPILE_FLAGS "-fsigned-char -O3")
@@ -124,7 +124,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMP
else(NOAVX)
PCL_CHECK_FOR_AVX()
if(${HAVE_AVX_EXTENSIONS})
- set(DEFAULT_COMPILE_FLAGS "${DEFAULT_COMPILE_FLAGS} -march=corei7-avx -mtune=corei7-avx")
......
This diff is collapsed.
......@@ -153,6 +153,8 @@ for (sequenceIndex = 0; sequenceIndex < all64.species; sequenceIndex = sequenceI
GetDataInfo (siteInfo, all64, sequenceIndex, siteIndex);
siteInfo1 = stopCodonTemplate*siteInfo;
siteInfo2 = nonStopCodonTemplate*siteInfo;
if (siteInfo1[0]>0 && siteInfo2[0] == 0)
{
sitesWithDeletions[siteIndex] = 1;
......
......@@ -227,9 +227,13 @@ if (utility.Has (fade.cache, terms.fade.cache.model, "String") && utility.Has (f
io.WriteCacheToFile (fade.path.cache, fade.cache);
}
fade.rebuild_lf = FALSE;
if (utility.Has (fade.cache, terms.fade.cache.baseline, "AssociativeList")) {
io.ReportProgressMessageMD ("FADE", "baseline", "Loaded baseline model fit from cache");
fade.baseline_fit = fade.cache [terms.fade.cache.baseline];
fade.rebuild_lf = TRUE;
} else {
selection.io.startTimer (fade.json [terms.json.timers], "Baseline Fit", 1);
io.ReportProgressMessageMD ("FADE", "baseline", "Fitting the baseline (`fade.baseline_model`) model to obtain branch lengths and rate matrix estimates");
......@@ -294,6 +298,19 @@ utility.ForEachPair (fade.filter_specification, "_key_", "_value_",
// TBD
if (utility.Has (fade.cache, terms.fade.cache.substitutions, "AssociativeList") == FALSE) {
if (fade.rebuild_lf) {
estimators.CreateLFObject ( "fade.ancestral.rebuild",
fade.filter_names,
fade.trees,
"fade.generator.MLE" ,
fade.baseline_fit,
{terms.run_options.retain_lf_object: TRUE},
None
)
fade.baseline_fit[terms.likelihood_function] = "fade.ancestral.rebuild.likelihoodFunction";
}
utility.EnsureKey (fade.cache, terms.fade.cache.substitutions);
utility.EnsureKey (fade.cache, terms.fade.cache.composition);
......
......@@ -254,7 +254,6 @@ if (utility.Has (fubar.cache, terms.fubar.cache.grid, "Matrix") && utility.Has (
estimators.ApplyExistingEstimates ("fubar.lf.codon", fubar.model_id_to_object, fubar.gtr_results, None);
estimators.TraverseLocalParameters ("fubar.lf.codon", fubar.model_id_to_object, "fubar.scalers.Constrain");
fubar.pass1 = Max (fubar.ComputeOnGrid ("fubar.lf.codon",
fubar.grid.MatrixToDict (fubar.grid.matrix),
"fubar.pass1.evaluator",
......@@ -721,6 +720,7 @@ lfunction fubar.scalers.SetBranchLength (model, value, parameter) {
parameters.RemoveConstraint (beta);
parameters.SetValue ("`parameter`.`alpha`", s);
parameters.SetValue ("`parameter`.`beta`", ^beta);
return 1;
}
//------------------------------------------------------------------------------
......
......@@ -467,7 +467,7 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa
^"meme.site_beta_nuisance" = 1;
//console.log ("Optimizing FEL for pattern " + pattern_info);
io.SpoolLF (lf_fel, "/tmp/meme.debug", "FEL");
//io.SpoolLF (lf_fel, "/tmp/meme.debug", "FEL");
Optimize (results, ^lf_fel);
fel = estimators.ExtractMLEs (lf_fel, model_mapping);
......@@ -483,8 +483,9 @@ lfunction meme.handle_a_site (lf_fel, lf_bsrel, filter_data, partition_index, pa
}
//console.log ("Optimizing MEME for pattern " + pattern_info);
io.SpoolLF (lf_bsrel, "/tmp/meme.debug", "MEME");
//io.SpoolLF (lf_bsrel, "/tmp/meme.debug", "MEME");
Optimize (results, ^lf_bsrel);
//console.log (results[1][0]);
alternative = estimators.ExtractMLEs (lf_bsrel, model_mapping);
alternative [utility.getGlobalValue("terms.fit.log_likelihood")] = results[1][0];
......
......@@ -24,16 +24,22 @@ LoadFunctionLibrary("modules/io_functions.ibf");
LoadFunctionLibrary("modules/selection_lib.ibf");
LoadFunctionLibrary("libv3/models/codon/BS_REL.bf");
LoadFunctionLibrary("libv3/convenience/math.bf");
LoadFunctionLibrary ("libv3/tasks/mpi.bf");
utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE);
utility.SetEnvVariable ("ASSUME_REVERSIBLE_MODELS", TRUE);
//utility.SetEnvVariable ("LF_SMOOTHING_SCALER", 0.01);
//utility.SetEnvVariable ("LF_SMOOTHING_REDUCTION", 1/2);
/*------------------------------------------------------------------------------*/
relax.analysis_description = {
terms.io.info : "RELAX (a random effects test of selection relaxation) uses a random effects branch-site model framework to test whether a set of 'Test' branches evolves under relaxed selection relative to a set of 'Reference' branches (R), as measured by the relaxation parameter (K).",
terms.io.version : "2.0",
terms.io.info : "RELAX (a random effects test of selection relaxation) uses a random effects branch-site model framework to test whether a set of 'Test' branches evolves under relaxed selection relative to a set of 'Reference' branches (R), as measured by the relaxation parameter (K).
Version 2.1 adds a check for stability in K estimates to try to mitigate convergence problems",
terms.io.version : "2.1",
terms.io.reference : "RELAX: Detecting Relaxed Selection in a Phylogenetic Framework (2015). Mol Biol Evol 32 (3): 820-832",
terms.io.authors : "Sergei L Kosakovsky Pond, Ben Murrell, Steven Weaver and Temple iGEM / UCSD viral evolution group",
terms.io.contact : "spond@temple.edu",
......@@ -49,6 +55,8 @@ relax.json = { terms.json.analysis: relax.analysis_description,
relax.relaxation_parameter = "relax.K";
relax.rate_classes = 3;
relax.MG94_name = terms.json.mg94xrev_sep_rates;
relax.general_descriptive_name = "General descriptive";
relax.alternative_name = "RELAX alternative";
......@@ -62,6 +70,11 @@ terms.relax.k_range = {
terms.upper_bound: "50"
};
terms.relax.k_range1 = {
terms.lower_bound: "1",
terms.upper_bound: "50"
};
relax.p_threshold = 0.05;
relax.test_branches_name = "Test";
......@@ -90,6 +103,7 @@ selection.io.startTimer (relax.json [terms.json.timers], "Overall", 0);
namespace relax {
LoadFunctionLibrary ("modules/shared-load-file.bf");
load_file ({utility.getGlobalValue("terms.prefix"): "relax", utility.getGlobalValue("terms.settings") : {utility.getGlobalValue("terms.settings.branch_selector") : "relax.select_branches"}});
LoadFunctionLibrary ("modules/grid_compute.ibf");
}
......@@ -136,12 +150,13 @@ namespace relax {
io.ReportProgressMessageMD ("RELAX", "codon-refit", "Improving branch lengths, nucleotide substitution biases, and global dN/dS ratios under a full codon model");
relax.final_partitioned_mg_results = estimators.FitMGREV (relax.filter_names, relax.trees, relax.codon_data_info [terms.code], {
terms.run_options.model_type: terms.local,
terms.run_options.partitioned_omega: relax.selected_branches,
}, relax.partitioned_mg_results);
io.ReportProgressMessageMD("RELAX", "codon-refit", "* " + selection.io.report_fit (relax.final_partitioned_mg_results, 0, relax.codon_data_info[terms.data.sample_size]));
relax.global_dnds = selection.io.extract_global_MLE_re (relax.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio);
......@@ -175,9 +190,14 @@ utility.ForEachPair (relax.filter_specification, "_key_", "_value_",
selection.io.stopTimer (relax.json [terms.json.timers], "Preliminary model fitting");
parameters.DeclareGlobalWithRanges (relax.relaxation_parameter, 1, 0, 50);
if (relax.model_set == "All") { // run all the models
relax.ge_guess = None;
while (1) {
relax.ge.bsrel_model = model.generic.DefineMixtureModel("relax.BS_REL.ModelDescription",
"relax.ge", {
"0": parameters.Quote(terms.local),
......@@ -191,14 +211,24 @@ if (relax.model_set == "All") { // run all the models
parameters.SetRange (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.i)), terms.range_almost_01);
}
parameters.SetRange (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.rate_classes)), terms.range_gte1);
/*
for (relax.i = 1; relax.i <= relax.rate_classes; relax.i += 1) {
console.log (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.i)));
console.log (
parameters.GetRange (model.generic.GetGlobalParameter (relax.ge.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.i)))
);
}
*/
relax.model_object_map = { "relax.ge" : relax.ge.bsrel_model };
io.ReportProgressMessageMD ("RELAX", "gd", "Fitting the general descriptive (separate k per branch) model");
selection.io.startTimer (relax.json [terms.json.timers], "General descriptive model fitting", 2);
if (Type (relax.ge_guess) != "Matrix") {
relax.ge_guess = relax.DistributionGuess(utility.Map (selection.io.extract_global_MLE_re (relax.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio + ".+test.+"), "_value_",
"_value_[terms.fit.MLE]"));
}
relax.distribution = models.codon.BS_REL.ExtractMixtureDistribution(relax.ge.bsrel_model);
......@@ -210,17 +240,47 @@ if (relax.model_set == "All") { // run all the models
relax.final_partitioned_mg_results,
relax.model_object_map,
{
terms.run_options.apply_user_constraints: "relax.init.k"
terms.run_options.apply_user_constraints: "relax.init.k",
terms.run_options.retain_lf_object : TRUE
});
//Export (lfe, ^relax.general_descriptive.fit [terms.likelihood_function]);
//console.log (lfe);
estimators.TraverseLocalParameters (relax.general_descriptive.fit [terms.likelihood_function], relax.model_object_map, "relax.set.k2");
relax.general_descriptive.fit = estimators.FitExistingLF (relax.general_descriptive.fit [terms.likelihood_function], relax.model_object_map);
selection.io.stopTimer (relax.json [terms.json.timers], "General descriptive model fitting");
io.ReportProgressMessageMD("RELAX", "ge", "* " + selection.io.report_fit (relax.general_descriptive.fit, 9, relax.codon_data_info[terms.data.sample_size]));
io.ReportProgressMessageMD("RELAX", "ge", "* The following baseline rate distribution for branch-site combinations was inferred");
relax.inferred_ge_distribution = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistributionFromFit (relax.ge.bsrel_model, relax.general_descriptive.fit)) % 0;
selection.io.report_dnds (relax.inferred_ge_distribution);
if (relax.rate_classes > 2) {
if (relax.inferred_ge_distribution[0][1] < 1e-5 || relax.inferred_ge_distribution[1][1] < 1e-5) {
io.ReportProgressMessageMD("RELAX", "ge", "\n ### Because some of the rate classes were collapsed to 0, the model is likely overparameterized. RELAX will reduce the number of site rate classes by one and repeat the fit now.\n----\n");
relax.rate_classes = relax.rate_classes - 1;
relax.ge_guess = {relax.rate_classes, 2};
relax.shift = 0;
//console.log (relax.inferred_ge_distribution);
for (relax.i = 0; relax.i < relax.rate_classes; relax.i += 1) {
if (relax.inferred_ge_distribution[relax.i][1] < 1e-5 && relax.shift == 0) {
relax.shift += 1;
continue;
}
relax.ge_guess[relax.i][0] = relax.inferred_ge_distribution[relax.i + relax.shift][0];
relax.ge_guess[relax.i][1] = relax.inferred_ge_distribution[relax.i + relax.shift][1];
}
//console.log (relax.ge_guess);
continue;
}
}
relax.distribution_for_json = {'Shared' : utility.Map (utility.Range (relax.rate_classes, 0, 1),
"_index_",
"{terms.json.omega_ratio : relax.inferred_ge_distribution [_index_][0],
......@@ -251,6 +311,9 @@ if (relax.model_set == "All") { // run all the models
0,
relax.k_estimates);
break;
}
} else {
relax.general_descriptive.fit = relax.final_partitioned_mg_results;
}
......@@ -289,7 +352,6 @@ relax.bound_weights = models.BindGlobalParameters ({"0" : relax.reference.bsrel_
models.BindGlobalParameters ({"0" : relax.test.bsrel_model, "1" : relax.reference.bsrel_model}, terms.nucleotideRate("[ACGT]","[ACGT]"));
parameters.DeclareGlobalWithRanges (relax.relaxation_parameter, 1, 0, 50);
model.generic.AddGlobal (relax.test.bsrel_model, relax.relaxation_parameter, terms.relax.k);
......@@ -356,10 +418,69 @@ io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution wa
relax.inferred_distribution = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.test.bsrel_model)) % 0;
selection.io.report_dnds (relax.inferred_distribution);
io.ReportProgressMessageMD("RELAX", "alt", "* The following rate distribution was inferred for **reference** branches");
relax.inferred_distribution_ref = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.reference.bsrel_model)) % 0;
selection.io.report_dnds (relax.inferred_distribution_ref);
relax.lf.raw = relax.ComputeOnGrid ( relax.alternative_model.fit[terms.likelihood_function],
relax.grid.MatrixToDict ({200,1}["_MATRIX_ELEMENT_ROW_*0.025"]),
"relax.pass1.evaluator",
"relax.pass1.result_handler");
// FIND the difference between K < 1 and K > 1
relax.best_samples = {{-1e100,-1e100}};
for (relax.k = 0; relax.k < 40; relax.k += 1) {
relax.best_samples[0] = Max (relax.best_samples[0], relax.lf.raw[relax.k]);
}
for (relax.k = 40; relax.k < 200; relax.k += 1) {
relax.best_samples[1] = Max (relax.best_samples[1], relax.lf.raw[relax.k]);
}
if (Abs (relax.best_samples[1] - relax.best_samples[0]) < 5.) { // could be diagnostic of convergence problems
io.ReportProgressMessageMD("RELAX", "alt-2", "* Potential convergence issues due to flat likelihood surfaces; checking to see whether K > 1 or K < 1 is robustly inferred");
if (relax.fitted.K > 1) {
parameters.SetRange (model.generic.GetGlobalParameter (relax.test.bsrel_model , terms.relax.k), terms.range01);
} else {
parameters.SetRange (model.generic.GetGlobalParameter (relax.test.bsrel_model , terms.relax.k), terms.relax.k_range1);
}
relax.alternative_model.fit.take2 = estimators.FitLF (relax.filter_names, relax.trees, { "0" : relax.model_map},
relax.alternative_model.fit ,
relax.model_object_map,
{terms.run_options.retain_lf_object: TRUE}
);
if (relax.alternative_model.fit.take2 [terms.fit.log_likelihood] > relax.alternative_model.fit[terms.fit.log_likelihood]) {
io.ReportProgressMessageMD("RELAX", "alt-2", "\n### Potential for highly unreliable K inference due to multiple local maxima in the likelihood function, treat results with caution ");
io.ReportProgressMessageMD("RELAX", "alt-2", "> Relaxation parameter reset to opposite mode of evolution from that obtained in the initial optimization.");
io.ReportProgressMessageMD("RELAX", "alt-2", "* " + selection.io.report_fit (relax.alternative_model.fit.take2, 9, relax.codon_data_info[terms.data.sample_size]));
relax.fitted.K = estimators.GetGlobalMLE (relax.alternative_model.fit.take2,terms.relax.k);
io.ReportProgressMessageMD("RELAX", "alt-2", "* Relaxation/intensification parameter (K) = " + Format(relax.fitted.K,8,2));
io.ReportProgressMessageMD("RELAX", "alt-2", "* The following rate distribution was inferred for **test** branches");
relax.inferred_distribution = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.test.bsrel_model)) % 0;
selection.io.report_dnds (relax.inferred_distribution);
io.ReportProgressMessageMD("RELAX", "alt-2", "* The following rate distribution was inferred for **reference** branches");
relax.inferred_distribution_ref = parameters.GetStickBreakingDistribution (models.codon.BS_REL.ExtractMixtureDistribution (relax.reference.bsrel_model)) % 0;
selection.io.report_dnds (relax.inferred_distribution_ref);
relax.alternative_model.fit = relax.alternative_model.fit.take2;
}
parameters.SetRange (model.generic.GetGlobalParameter (relax.test.bsrel_model , terms.relax.k), terms.relax.k_range);
}
relax.distribution_for_json = {relax.test_branches_name : utility.Map (utility.Range (relax.rate_classes, 0, 1),
"_index_",
"{terms.json.omega_ratio : relax.inferred_distribution [_index_][0],
......@@ -459,7 +580,7 @@ if (relax.model_set == "All") {
io.ReportProgressMessageMD ("RELAX", "pe", "Fitting the partitioned descriptive model (separate distributions for *test* and *reference* branches)");
parameters.RemoveConstraint (utility.Keys (relax.bound_weights));
for (relax.i = 1; relax.i < relax.rate_classes; relax.i += 1) {
for (relax.i = 1; relax.i <= relax.rate_classes; relax.i += 1) {
parameters.RemoveConstraint (model.generic.GetGlobalParameter (relax.test.bsrel_model , terms.AddCategory (terms.parameters.omega_ratio,relax.i)));
}
relax.pe.fit = estimators.FitExistingLF (relax.alternative_model.fit[terms.likelihood_function], relax.model_object_map);
......@@ -522,17 +643,31 @@ lfunction relax.extract.k(branch_info) {
lfunction relax.set.k (tree_name, node_name, model_description) {
if (utility.Has (model_description [utility.getGlobalValue ("terms.local")], utility.getGlobalValue ("terms.relax.k"), "String")) {
k = (model_description [utility.getGlobalValue ("terms.local")])[utility.getGlobalValue ("terms.relax.k")];
parameters.SetValue (tree_name + "." + node_name + "." + k, 1);
parameters.SetConstraint (tree_name + "." + node_name + "." + k, utility.getGlobalValue ("relax.relaxation_parameter"), "");
parameters.SetRange (tree_name + "." + node_name + "." + k, utility.getGlobalValue ("terms.relax.k_range"));
}
return tree_name + "." + node_name + "." + k;
}
//------------------------------------------------------------------------------
lfunction relax.set.k2 (tree_name, node_name, model_description) {
if (utility.Has (model_description [utility.getGlobalValue ("terms.local")], utility.getGlobalValue ("terms.relax.k"), "String")) {
k = (model_description [utility.getGlobalValue ("terms.local")])[utility.getGlobalValue ("terms.relax.k")];
parameters.RemoveConstraint (tree_name + "." + node_name + "." + k);
}
return tree_name + "." + node_name + "." + k;
}
//------------------------------------------------------------------------------
lfunction relax.init.k (lf_id, components, data_filter, tree, model_map, initial_values, model_objects) {
parameter_set = estimators.TraverseLocalParameters (lf_id, model_objects, "relax.set.k");
parameters.SetConstraint (model.generic.GetGlobalParameter (utility.getGlobalValue("relax.ge.bsrel_model") , terms.AddCategory (utility.getGlobalValue("terms.parameters.omega_ratio"),2)), utility.getGlobalValue("terms.parameters.one"), utility.getGlobalValue("terms.global"));
rc = utility.getGlobalValue ("relax.rate_classes");
/*if (rc > 2) {
parameters.SetConstraint (model.generic.GetGlobalParameter (utility.getGlobalValue("relax.ge.bsrel_model") , terms.AddCategory (utility.getGlobalValue("terms.parameters.omega_ratio"),rc-1)), utility.getGlobalValue("terms.parameters.one"), utility.getGlobalValue("terms.global"));
}*/
/*parameters.SetConstraint (model.generic.GetGlobalParameter (utility.getGlobalValue("relax.ge.bsrel_model") , terms.AddCategory (utility.getGlobalValue("terms.parameters.omega_ratio"),utility.getGlobalValue ("relax.rate_classes"))),
"1/(" +
Join ("*", utility.Map (
......@@ -558,7 +693,17 @@ lfunction relax.BS_REL.ModelDescription (type, code, components) {
lfunction relax.DistributionGuess (mean) {
guess = {{0.05,0.7}{0.25,0.2}{10,0.1}};
rc = utility.getGlobalValue ("relax.rate_classes");
guess = {rc,2};
guess[rc-1][0] = 5;
guess[rc-1][1] = 0.1;
for (k = 0; k < rc - 1; k += 1) {
guess[k][0] = 0.1 ^ (1 / (1 + k));
guess[k][1] = (0.9) / (rc-1) ;
}
norm = + guess[-1][1];
guess_mean = 1/(+(guess [-1][0] $ guess [-1][1]))/norm;
......@@ -637,6 +782,8 @@ lfunction relax.BS_REL._DefineQ (bs_rel, namespace) {
if ( component < bs_rel[utility.getGlobalValue("terms.model.components")]) {
model.generic.AddGlobal ( bs_rel, _aux[component-1], terms.AddCategory (utility.getGlobalValue("terms.mixture.mixture_aux_weight"), component ));
parameters.DeclareGlobalWithRanges (_aux[component-1], 0.5, 0, 1);
} else {
}
models.codon.generic.DefineQMatrix(bs_rel, namespace);
rate_matrices [key] = bs_rel[utility.getGlobalValue("terms.model.rate_matrix")];
......@@ -718,3 +865,15 @@ lfunction relax.select_branches(partition_info) {
return_set + tree_configuration;
return return_set;
}
//------------------------------------------------------------------------------
lfunction relax.grid.MatrixToDict (grid) {
return utility.Map (utility.MatrixToListOfRows (grid), "_value_",
'{ terms.relax.k : {
terms.id : relax.relaxation_parameter,
terms.fit.MLE : _value_[1]
}
}');
}
......@@ -19,6 +19,7 @@ LoadFunctionLibrary("libv3/models/codon/MG_REV.bf");
LoadFunctionLibrary("modules/io_functions.ibf");
LoadFunctionLibrary("modules/selection_lib.ibf");
/*------------------------------------------------------------------------------
Display analysis information
*/
......@@ -111,13 +112,12 @@ slac.table_headers = {{"ES", "Expected synonymous sites"}
slac.table_screen_output = {{"Codon", "Partition", "S", "N", "dS", "dN", "Selection detected?"}};
slac.table_output_options = {terms.table_options.header : TRUE, terms.table_options.minimum_column_width : 16, terms.table_options.align : "center"};
namespace slac {
LoadFunctionLibrary ("modules/shared-load-file.bf");
load_file ("slac");
}
slac.samples = io.PromptUser ("\n>Select the number of samples used to assess ancestral reconstruction uncertainty [select 0 to skip]",100,0,100000,TRUE);
slac.pvalue = io.PromptUser ("\n>Select the p-value threshold to use when testing for selection",0.1,0,1,FALSE);
......@@ -132,6 +132,8 @@ selection.io.startTimer (slac.json [terms.json.timers], "Model fitting",1 );
namespace slac {
doGTR ("slac");
}
estimators.fixSubsetOfEstimates(slac.gtr_results, slac.gtr_results[terms.global]);
namespace slac {
......
RequireVersion ("2.3.12");
LoadFunctionLibrary ("libv3/all-terms.bf");
LoadFunctionLibrary ("libv3/UtilityFunctions.bf");
LoadFunctionLibrary ("libv3/IOFunctions.bf");
LoadFunctionLibrary ("libv3/tasks/estimators.bf");
LoadFunctionLibrary ("libv3/tasks/alignments.bf");
LoadFunctionLibrary ("libv3/tasks/ancestral.bf");
LoadFunctionLibrary ("libv3/tasks/trees.bf");
LoadFunctionLibrary ("../modules/io_functions.ibf");
LoadFunctionLibrary ("../modules/selection_lib.ibf");
LoadFunctionLibrary ("libv3/convenience/math.bf");
LoadFunctionLibrary ("libv3/models/protein.bf");
LoadFunctionLibrary ("libv3/models/protein/empirical.bf");
LoadFunctionLibrary ("libv3/models/protein/REV.bf");
LoadFunctionLibrary ("libv3/tasks/mpi.bf");
LoadFunctionLibrary ("libv3/stats.bf");
LoadFunctionLibrary ("libv3/convenience/random.bf");
utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", TRUE);
utility.SetEnvVariable ("ACCEPT_ROOTED_TREES", TRUE);
namespace terms.fade {
mode = "mode";
regimes = "regimes";
bias = "substitution bias";
rate = "rate multiplier";
generator = "generator";
};
fade.parameter.bias = "FADE.bias";
fade.parameter.rate = "FADE.rate";
fade.tree.name = "FADE.simulated_tree";
fade.settings = {};
fade.alphabet = "ACDEFGHIKLMNPQRSTVWY";
fade.alphabet.matrix = {1,20};
fade.simulation.matrix = {2,20};
fade.evolutionary_modes = {"Null" : "Evolution under baseline model"};
for (r = 0; r < Abs (fade.alphabet); r += 1) {
fade.evolutionary_modes [fade.alphabet[r]] = "Directional evolution towards `fade.alphabet[r]`";
fade.alphabet.matrix [r] = fade.alphabet[r];
fade.simulation.matrix[0][r] = fade.alphabet[r];
}
fade.simulation.matrix[1][0] = "1";
fade.analysis_description = {terms.io.info :
"A companion data simulator for FADE",
terms.io.version : "0.1",
terms.io.reference : "TBD",
terms.io.authors : "Sergei L Kosakovsky Pond",
terms.io.contact : "spond@temple.edu",
terms.io.requirements : "A **rooted** phylogenetic tree with branch lengths (optionally annotated with {} to define a branch partition set)"
};
io.DisplayAnalysisBanner (fade.analysis_description);
// =========== LOAD DATA AND SET UP CACHES
SetDialogPrompt ("Specify a rooted tree to use for data simulations");
fade.baseline.tree = trees.LoadAnnotatedTopology (TRUE);
assert (trees.HasBranchLengths(fade.baseline.tree), "Input tree MUST have branch lengths");
assert (fade.baseline.tree[terms.trees.rooted], "Input tree MUST be rooted");
fade.replicates = io.PromptUser ("How many replicate datasets be simulated", 100, 1, 10000, true);
fade.sites_class_count = io.PromptUser ("How many types of sites will be simulated", 2, 1, 10000, true);
fade.site_classes = {};
for (k = 0; k < fade.sites_class_count; k += 1) {
this_class = {
terms.data.sites : io.PromptUser ("How many sites are in class " + (k+1), 100, 1, 10000, true),
terms.fade.mode : io.SelectAnOption (fade.evolutionary_modes, "Evolutionary regime for site class " + (k+1)),
fade.parameter.rate : io.PromptUser ("Relative overall rate for site class " + (k+1) + " (1 = average)", 1, 0, 1000, false)
};
if (this_class [terms.fade.mode] != "Null") {
this_class [fade.parameter.bias] = io.PromptUser ("Substitution bias for site class " + (k+1) + " (0 = no bias)", 1, 0, 1000, false);
}
fade.site_classes [k] = this_class;
}
fade.selected_branches = (selection.io.defineBranchSets ( {"0" : { terms.data.tree : fade.baseline.tree}} ))[0];
fade.settings [terms.data.tree] = fade.baseline.tree[terms.trees.newick_with_lengths];
fade.settings [terms.json.tested] = fade.selected_branches;
fade.settings [terms.fade.regimes] = fade.site_classes;
fade.settings [terms.replicates] = fade.replicates;
utility.Extend (models.protein.empirical_models, {"GTR" : "General time reversible model (189 estimated parameters)."});
fade.baseline_model = io.SelectAnOption (models.protein.empirical_models, "Baseline substitution model");
fade.generator = (utility.Extend (models.protein.empirical.plusF_generators , {"GTR" : "models.protein.REV.ModelDescription"}))[fade.baseline_model ];
fade.branch_lengths = parameters.helper.tree_lengths_to_initial_values ({"0" : fade.baseline.tree}, None);
fade.settings [terms.model] = fade.baseline_model;
fade.settings [terms.replicates] = fade.replicates;
lfunction fade.rate.modifier (fromChar, toChar, namespace, model_type, model) {
baseline = Call (^"fade.baseline_model.rate", fromChar,toChar, namespace, model_type, model);
utility.EnsureKey (baseline, model_type);
selection.io.json_store_key_value_pair (baseline, model_type, utility.getGlobalValue("terms.fade.bias"), utility.getGlobalValue("fade.parameter.bias"));
selection.io.json_store_key_value_pair (baseline, model_type, utility.getGlobalValue("terms.fade.rate"), utility.getGlobalValue("fade.parameter.rate"));
baseline [utility.getGlobalValue("terms.model.rate_entry")] = parameters.AppendMultiplicativeTerm (baseline [utility.getGlobalValue("terms.model.rate_entry")], utility.getGlobalValue("fade.parameter.rate"));
if ( Type (model["fade.residue_bias"]) == "String") {
if (toChar == model["fade.residue_bias"]) {
baseline [utility.getGlobalValue("terms.model.rate_entry")] =
parameters.AppendMultiplicativeTerm ( baseline [utility.getGlobalValue("terms.model.rate_entry")],
"`utility.getGlobalValue("fade.parameter.bias")`/(1-Exp (-`utility.getGlobalValue("fade.parameter.bias")`))");
} else {
if (fromChar == model["fade.residue_bias"]) {
parameters.AppendMultiplicativeTerm ( baseline [utility.getGlobalValue("terms.model.rate_entry")],
"`utility.getGlobalValue("fade.parameter.bias")`/(Exp (`utility.getGlobalValue("fade.parameter.bias")`-1))");
}
}
}
return baseline;
}
lfunction fade.biased.model.generator (type, residue) {
model = Call (^"fade.generator", type);
utility.setGlobalValue("fade.baseline_model.rate", model[utility.getGlobalValue ("terms.model.q_ij")]);
model[utility.getGlobalValue ("terms.model.q_ij")] = "fade.rate.modifier";
model["fade.residue_bias"] = residue;
model[utility.getGlobalValue ("terms.alphabet")] = utility.getGlobalValue ("fade.alphabet.matrix");
return model;
}
fade.model.baseline = model.generic.DefineModel("fade.biased.model.generator",
"fade.baseline_model", {
"0": "terms.global",
"1": None
},
None,
"frequencies.equal");
fade.settings [terms.fade.generator] = fade.model.baseline;
fade.model_assignment_with_bias = {
"fade.baseline_model" : utility.Filter (fade.selected_branches, "_value_", "_value_ == terms.tree_attributes.background"),
"fade.biased_model" : utility.Filter (fade.selected_branches, "_value_", "_value_ == terms.tree_attributes.test"),
};
fade.model_assignment_without_bias = {
"fade.baseline_model" : utility.Filter (fade.selected_branches, "_value_", "TRUE"),
};
parameters.DeclareGlobalWithRanges (fade.parameter.rate, 1, 0, 100);
parameters.DeclareGlobalWithRanges (fade.parameter.bias, 1e-10, 1e-10, 100);
fade.replicate_data = {};
fade.simulation_path = io.PromptUserForString ("Save simulation settings to (all simulation files will be saved to the same location with .N.fas extensions)");
fprintf (fade.simulation_path, CLEAR_FILE, fade.settings);
fade.sim_frequencies = fade.model.baseline[terms.efv_estimate];
for (fade.block_id = 0; fade.block_id < Abs (fade.site_classes); fade.block_id += 1) {
io.ReportProgressBar ("simulation", "Generating data for selection regime " + (fade.block_id+1) );
fade.this_block = fade.site_classes[fade.block_id];
if (utility.Has ( fade.this_block, fade.parameter.bias, "Number")) {
fade.bias.residue = fade.this_block[terms.fade.mode];
fade.model.biased = model.generic.DefineModel("fade.biased.model.generator",
"fade.biased_model", {
"0": "terms.global",
"1": parameters.Quote (fade.bias.residue)
},
None,
"frequencies.equal");
fade.model_id_to_object = {
"fade.biased_model": fade.model.biased,
"fade.baseline_model": fade.model.baseline
};
parameters.SetValue (fade.parameter.bias, fade.this_block[fade.parameter.bias]);
fade.model_assignment = fade.model_assignment_with_bias;
} else {
fade.model_assignment = fade.model_assignment_without_bias;
fade.model_id_to_object = {
"fade.baseline_model": fade.model.baseline
};
}
parameters.SetValue (fade.parameter.bias, 1e-10);
parameters.SetValue (fade.parameter.rate, 1);
model.ApplyModelToTree(fade.tree.name, fade.baseline.tree, None, fade.model_assignment);
estimators.ApplyExistingEstimatesToTree (fade.tree.name, fade.model_id_to_object, (fade.branch_lengths[terms.branch_length])[0], None, {});
parameters.SetValue (fade.parameter.rate, fade.this_block[fade.parameter.rate]);
parameters.SetValue (fade.parameter.bias, fade.this_block[fade.parameter.bias]);
for (fade.replicate_id = 0; fade.replicate_id < fade.replicates; fade.replicate_id += 1) {
DataSet simulated_block = Simulate (^fade.tree.name, fade.sim_frequencies , fade.simulation.matrix, fade.this_block[terms.data.sites]);
fade.data_block = alignments.GetAllSequences ("simulated_block");
io.ReportProgressBar ("simulation", "Generating data for selection regime " + (fade.block_id+1) + " replicate " + fade.replicate_id + " / " + fade.replicates);
if (fade.block_id == 0) {
fade.replicate_data [fade.replicate_id] = fade.data_block;
} else {
utility.ForEachPair (fade.data_block, "_id_", "_seq_",
'
(fade.replicate_data[fade.replicate_id])[_id_] += _seq_;
');
}
}
}
io.ClearProgressBar ();
for (fade.replicate_id = 0; fade.replicate_id < fade.replicates; fade.replicate_id += 1) {
fade.current_file = fade.simulation_path + "." + (fade.replicate_id+1) + ".fas";
console.log (fade.current_file);
fprintf (fade.current_file, CLEAR_FILE, KEEP_OPEN);
utility.ForEachPair ((fade.replicate_data[fade.replicate_id]), "_id_", "_seq_",
'
fprintf (fade.current_file, ">", _id_, "\n", _seq_, "\n");
');
fprintf (fade.current_file, "\n", fade.settings [terms.data.tree], ";", CLOSE_FILE);
}
This diff is collapsed.
//------------------------------------------------------------------------------
lfunction ComputeOnGrid (lf_id, grid, handler, callback) {
jobs = mpi.PartitionIntoBlocks(grid);
scores = {};
......@@ -38,9 +39,11 @@ lfunction pass1.evaluator (lf_id, tasks, scores) {
task_ids = utility.Keys (tasks);
task_count = Abs (tasks);
for (i = 0; i < task_count; i+=1) {
parameters.SetValues (tasks[task_ids[i]]);
LFCompute (^lf_id, ll);
results [task_ids[i]] = ll;
}
LFCompute (^lf_id, LF_DONE_COMPUTE);
......