Skip to content
Commits on Source (3)
* 3.2.0
- Fixes
- Increase test timeout for internal testsuite and testiphy.
- HTML report: don't hide header behind top-bar in Chrome.
- alignment-smc improvements.
* 3.1.5
- Fixes
- Make all programs use shipped libstdc++
- Make MDS plots handle bp-analyze --subsample
- Increase test timeouts
- Correctly write initial alignment for fixed alignment partitions.
- Don't write "file:" for MDS URLs in HTML report
- Print citations with pmid and pmcid in help.
- Add Frequencies.uniform[] function.
- Print help for 0-argument functions like 'dna'
- Make SEV handle site-compression.
- HTML report: print version number + lots of cosmetic improvements.
- Add new tool tree-tool (and map page, etc.)
- Change alignments-diff highlight color back to red.
- Clean up DP matrix code.
* 3.1.4
- Fix prior on alpha in Rates.gamma
* 3.1.3
- Fix mean_length prior in RS05 model.
- Add some more color-schemes for drawing alignments-diff output.
- Add ferns exon/intro data set.
* 3.1.2
- Fix a testsuite bug
* 3.1.1
- Print priors on models in their own section.
bali-phy (3.2+dfsg-1) UNRELEASED; urgency=medium
* New upstream version
-- Benjamin Redelings <benjamin.redelings@gmail.com> Mon, 25 Jun 2018 21:39:12 -0400
bali-phy (3.1.5+dfsg-1) unstable; urgency=medium
* New upstream version
......
This diff is collapsed.
<!DOCTYPE book [
<!ENTITY version "3.1.4" >
<!ENTITY version "3.2" >
<!ENTITY source.file "bali-phy-&version;.tar.gz" >
<!ENTITY linux64.file "bali-phy-&version;-linux64.tar.gz" >
......@@ -848,6 +848,13 @@ scale = 1,3:</programlisting>
</listitem>
</varlistentry>
<varlistentry>
<term>C1.run.json</term>
<listitem>
<para>JSON file containing information about the command line, models, hostname, start time, etc.</para>
</listitem>
</varlistentry>
</variablelist>
<para>For the last two files, each line in these files corresponds to one iteration.</para>
......
No preview for this file type
This diff is collapsed.
module AirLine where {
module CoalMine where {
import Distributions;
fatalities = [4, 5, 4, 1, 0, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2, 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 3, 3, 0, 0, 0, 1, 4, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1];
......
{
"name": "rs07_relaxed_rates",
"result_type": "IndelModel",
"generate_function": false,
"call": "IModel.rs07_relaxed_rates_model",
"args": "",
"extract": "all"
......
project('bali-phy', ['cpp','c'],
version: '3.1.5',
version: '3.2',
default_options : [
'buildtype=release',
'cpp_std=c++14'
......@@ -135,7 +135,7 @@ run_tests = find_program(join_paths(meson.source_root(),'tests/run-tests.py'))
test('bali-phy testsuite',
run_tests,
timeout: 600,
timeout: 3000,
workdir: join_paths(meson.source_root(),'tests'),
args:[baliphy.full_path(), packagepath])
......@@ -146,7 +146,7 @@ testiphy = find_program(join_paths(meson.source_root(),'testiphy/testiphy'), req
if testiphy.found()
test('testiphy (likelihood testsuite)',
testiphy,
timeout: 600,
timeout: 3000,
workdir: join_paths(meson.source_root(),'testiphy'),
args:[baliphy.full_path(), packagepath])
endif
......@@ -419,7 +419,13 @@ sub html_header
.content {margin:1em; margin-top: 3em}
.backlit td {background: rgb(220,220,220);}
.anchor {position: relative; top: -3em}
:target:before {
content:"";
display:block;
height:2em; /* fixed header height*/
margin:-2em 0 0; /* negative fixed header height */
}
h1 {font-size: 150%;}
h2 {font-size: 130%; margin-top:2.5em; margin-bottom: 1em}
......
#pragma clang diagnostic ignored "-Wreturn-type-c-linkage"
#undef NDEBUG
#include <vector>
#include <valarray>
#include <string>
......
......@@ -30,7 +30,7 @@ double quantile(double eta, double p)
vector<double> get_bin_boundaries(int n, const vector<double>& coalescent_rates, const vector<double>& level_boundaries)
{
assert(coalescent_rates.size() + 1 == level_boundaries.size());
assert(coalescent_rates.size() == level_boundaries.size());
assert(level_boundaries[0] == 0.0);
vector<double> b(n+1);
......@@ -58,13 +58,14 @@ vector<double> get_bin_boundaries(int n, const vector<double>& coalescent_rates,
assert(level < level_boundaries.size());
double delta_t = quantile(coalescent_rates[level], 1.0 - q2);
if (t2 + delta_t < level_boundaries[level+1])
if (t2 + delta_t < level_boundaries[level] or level == level_boundaries.size()-1)
{
b[i] = t2 + delta_t;
break;
}
else
{
assert(level + 1 < level_boundaries.size());
t2 = level_boundaries[level+1];
delta_t = level_boundaries[level+1] - level_boundaries[level];
assert(delta_t >= 0);
......@@ -397,11 +398,13 @@ constexpr double scale_min = 1.0/scale_factor;
constexpr double log_scale_min = -177.445678223345999210811423093293201427328034396225345054e0;
enum class site_t {unknown=0,poly,mono,missing};
enum class site_t {unknown=0,poly,mono,missing,empty};
inline site_t classify_site(int x1, int x2)
{
if (x1 < 0 or x2< 0)
if (x1 == -1 and x2 == -1)
return site_t::empty;
else if (x1 < 0 or x2< 0)
return site_t::missing;
else if (x1 == x2)
return site_t::mono;
......@@ -487,6 +490,11 @@ vector<pair<int,site_t>> classify_sites(const alignment& A)
for(int l=1; l < A.length();)
{
site_t s = classify_site(A(l,0), A(l,1));
if (s == site_t::empty)
{
l++;
continue;
}
int count = 0;
do
......@@ -518,7 +526,9 @@ void smc_group(vector<double>& L, vector<double>& L2, int& scale, const vector<E
double temp = 0;
for(int j=0;j<n_bins; j++)
temp += L[j] * M(j,k);
L2[k] = temp;
assert(temp > -1.0e-9);
L2[k] = std::max(temp, 0.0);
}
i += taking;
......@@ -532,7 +542,7 @@ void smc_group(vector<double>& L, vector<double>& L2, int& scale, const vector<E
log_double_t smc(double rho_over_theta, vector<double> coalescent_rates, vector<double> level_boundaries, const alignment& A)
{
assert(level_boundaries.size() >= 2);
assert(level_boundaries.size() >= 1);
assert(level_boundaries[0] == 0.0);
assert(rho_over_theta >= 0);
......@@ -564,8 +574,13 @@ log_double_t smc(double rho_over_theta, vector<double> coalescent_rates, vector<
int scale = 0;
// FIXME: I think we should be able to start at site -1 with L[i] = pi[i], if pi[i] is the equilibrium of T(j,k)
// Its not exactly the equilibrium, so this is approximate, I guess.
for(int i=0;i< n_bins; i++)
L[i] = pi[i] * emission_probabilities[i](A(0,0), A(0,1));
{
L[i] = pi[i];
// if (A(0,0) >= 0 and A(0,1) >= 0)
// L[i] *= emission_probabilities[i](A(0,0), A(0,1));
}
// # Iteratively compute likelihoods for remaining columns
const auto transition = get_transition_probabilities(bin_boundaries, bin_times, theta, rho);
......@@ -610,7 +625,6 @@ extern "C" closure builtin_function_smc_density(OperationArgs& Args)
level_boundaries.push_back(0);
for(auto& l: level_boundaries_)
level_boundaries.push_back(l.as_double());
level_boundaries.push_back(100000);
auto a = Args.evaluate(3);
auto& A = a.as_<alignment>();
......
#undef NDEBUG
/*
Copyright (C) 2004-2007,2009-2014 Benjamin Redelings
......
......@@ -167,11 +167,6 @@ ptree convert_rule(const Rules& R, Rule rule)
{
(*alphabet) = parse(R, alphabet->get_value<string>());
}
if (auto applied_args= x.get_child_optional("applied_args"))
{
(*applied_args) = parse_type(applied_args->get_value<string>());
}
}
return rule;
......
......@@ -287,11 +287,6 @@ expression_ref arg_to_apply(const ptree& expression)
return E;
}
expression_ref apply_args(expression_ref action, const ptree& applied_args)
{
return {action, arg_to_apply(applied_args)};
}
optional<vector<double>> get_frequencies_from_tree(const ptree& model_rep, const alphabet& a)
{
vector<double> pi;
......@@ -442,7 +437,6 @@ expression_ref get_model_as(const Rules& R, const ptree& model_rep, const map<st
if (not rule->count("call")) throw myexception()<<"No call for '"<<name<<"'";
// 6a. Extract parts of the rule
bool generate_function = rule->get("generate_function",true);
bool perform_function = rule->get("perform",false);
ptree call = rule->get_child("call");
ptree args = rule->get_child("args");
......@@ -451,20 +445,6 @@ expression_ref get_model_as(const Rules& R, const ptree& model_rep, const map<st
throw myexception()<<"For rule '"<<name<<"', function '"<<call.get_value<string>()<<"' must be a qualified symbol or a builtin constructor like '(,)', but it is neither!";
expression_ref E = dummy(call.get_value<string>());
// This means (i) don't perform the arguments first and (ii) don't add "return" to the result.
if (not generate_function)
{
for(int i=0;i<call.size();i++)
{
string arg_name = array_index(call,i).get_value<string>();
// check that arg_name is a valid argument
get_arg(*rule, arg_name);
expression_ref arg = get_model_as(R, model_rep.get_child(arg_name), scope);
E = {E,arg};
}
return E;
}
// 6b. Apply the arguments listed in the call : 'f call.name1 call.name2 call.name3'
// There could be fewer of these than the rule arguments.
for(int i=0;i<call.size();i++)
......@@ -489,11 +469,6 @@ expression_ref get_model_as(const Rules& R, const ptree& model_rep, const map<st
string arg_name = argi.get_child("arg_name").get_value<string>();
expression_ref arg = get_model_as(R, model_rep.get_child(arg_name), extend_scope(*rule, i, scope));
// Apply arguments if necessary
auto applied_args = argi.get_child_optional("applied_args");
if (applied_args)
arg = apply_args(arg, *applied_args);
auto log_name = name + ":" + arg_name;
// Prefix "arg_name" (arg_+arg_name)
arg = {Prefix, log_name, arg};
......
......@@ -34,10 +34,6 @@ along with BAli-Phy; see the file COPYING. If not see
#include "distance-methods.H"
#include "rng.H"
#ifdef NDEBUG
#undef NDEBUG
#endif
#include <boost/program_options.hpp>
#include <boost/shared_ptr.hpp>
#include "io.H"
......
......@@ -56,8 +56,6 @@ void do_setup(const variables_map& args,vector<alignment>& alignments)
throw myexception()<<"Alignment sample is empty.";
}
#undef NDEBUG
unsigned n_with_identity(const alignment& A,int s1,int s2,double I)
{
// Get matches
......
......@@ -278,8 +278,10 @@ bool is_masked_column(const alignment& A, int c)
return false;
return true;
}
bool is_variant_column(const alignment& A, int c)
{
assert(0 <= c and c < A.length());
int i=0;
int l0 = -1;
for(;i<A.n_sequences() and l0 < 0;i++)
......@@ -291,6 +293,15 @@ bool is_variant_column(const alignment& A, int c)
return false;
}
int count_variant_columns(const alignment& A, int c1, int c2)
{
int count = 0;
for(int c=c1;c<=c2 and c<A.length();c++)
if (is_variant_column(A,c))
count++;
return count;
}
void remove_columns(alignment& A, const std::function<bool(int)>& remove)
{
int j=0;
......@@ -575,31 +586,60 @@ void write_histogram(std::ostream& o, int blocksize, const alignment& A)
}
// The counts should be Poisson distributed. It will be relatively rate to get
// The counts should be Poisson distributed. It will be relatively rare to get
// counts more than 10 times higher than the mean
// FIXME - this does depend on the window boundaries.
// Change so that we use a sliding window.
dynamic_bitset<> block_mask(dynamic_bitset<> mask, int blocksize)
{
auto mask2 = mask;
for(int i=1; i<=blocksize; i++)
{
mask2 >>= 1;
mask |= mask2;
}
return mask;
}
int autoclean(alignment& A)
{
constexpr double mean = 2.0;
constexpr double mean_snps_per_block = 2.0;
constexpr double factor = 10.0;
int L = A.length();
int n_sites = A.length();
int S = n_snps(A);
if (S < 100) throw myexception()<<"Refusing to autoclean chromosome with < 100 variant sites.";
int masked = 0;
int blocksize = int(L*(mean/S)+0.5);
auto counts = snps_in_blocks(A, blocksize);
int blocksize = int(n_sites*(mean_snps_per_block/S)+0.5);
for(int i=0;i<counts.size();i++)
if (counts[i] >= mean*factor)
dynamic_bitset<> mask(n_sites);
int count = 0;
for(int c2=0; c2<n_sites; c2++)
{
int c1 = c2 - blocksize;
if ( is_variant_column(A,c2)) count++;
if (c1 >= 0 and is_variant_column(A,c1)) count--;
assert(count == count_variant_columns(A, std::max(c1+1,0), c2));
if (count >= mean_snps_per_block*factor)
{
mask.set(c2);
masked++;
for(int j=i*blocksize;j<A.length() and j<(i+1)*blocksize;j++)
mask_column(A, j);
}
}
auto mask2 = block_mask(mask, blocksize);
for(int i=0;i<n_sites;i++)
{
if (mask2[i])
{
mask_column(A, i);
}
}
// std::cerr<<"Masked "<<masked<<" sites\n";
return masked;
}
......@@ -643,6 +683,8 @@ int main(int argc,char* argv[])
auto A = A0;
const alphabet& a = A.get_alphabet();
if (args.count("autoclean"))
autoclean(A);
if (args.count("mask-file"))
{
......@@ -653,15 +695,6 @@ int main(int argc,char* argv[])
}
}
if (args.count("autoclean"))
autoclean(A);
if (args.count("histogram"))
{
write_histogram(std::cout, args["histogram"].as<int>(), A);
exit(0);
}
if (args.count("mask-gaps"))
{
// 1. label gap columns with 2 to remove them.
......@@ -678,6 +711,12 @@ int main(int argc,char* argv[])
remove_and_mask_columns(A, [&gaps](int c){return gaps[c];});
}
if (args.count("histogram"))
{
write_histogram(std::cout, args["histogram"].as<int>(), A);
exit(0);
}
//----- Count informative/non-constant sites ----//
dynamic_bitset<> informative(A.length());
dynamic_bitset<> informative2(A.length());
......