Skip to content
Commits on Source (4)
......@@ -4,25 +4,19 @@ environment:
matrix:
- MSYSTEM: MINGW64
PATH: C:\msys64\usr\bin;C:\msys64\mingw64\bin;C:\Windows\System32;C:\Windows;%PATH%
- MSYSTEM: MINGW32
PATH: C:\msys64\usr\bin;C:\msys64\mingw32\bin;C:\Windows\System32;C:\Windows;%PATH%
install:
# update mysy2
- C:\msys64\usr\bin\bash -lc "pacman --needed --noconfirm -Sy pacman-mirrors"
- C:\msys64\usr\bin\bash -lc "pacman --noconfirm -Sy"
- C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S autoconf automake bison flex"
- C:\msys64\usr\bin\bash -lc "pacman --noconfirm -S git"
before_build:
- bash -lc "cd $APPVEYOR_BUILD_FOLDER && mkdir build"
- bash -lc "cd $APPVEYOR_BUILD_FOLDER && ./bootstrap.sh"
# update msys2
- bash -lc "pacman --needed --noconfirm -Sy pacman-mirrors"
- bash -lc "pacman --noconfirm -Sy"
- bash -lc "pacman --noconfirm -S mingw-w64-x86_64-ninja"
- bash -lc "pacman --noconfirm -S mingw-w64-x86_64-python3-pip"
- bash -lc "pip3 install meson"
build_script:
- bash -lc "cd $APPVEYOR_BUILD_FOLDER/build && ../configure --prefix=C:/bali-phy"
- bash -lc "cd $APPVEYOR_BUILD_FOLDER/build && make -j3 install"
- bash -lc "cd C:/ && tar -zcf bali-phy.tgz bali-phy/"
- bash -lc "cd $APPVEYOR_BUILD_FOLDER/ && git submodule update --init"
- bash -lc "cd $APPVEYOR_BUILD_FOLDER/ && meson.py build --prefix=$APPVEYOR_BUILD_FOLDER/local"
- bash -lc "cd $APPVEYOR_BUILD_FOLDER/build && ninja install
# - bash -lc "cd $APPVEYOR_BUILD_FOLDER/build && ninja test"
test_script:
- bash -lc "cd $APPVEYOR_BUILD_FOLDER/build && make install"
# - bash -lc "cd $APPVEYOR_BUILD_FOLDER/build && make check"
Makefile.in
config.guess*
config.sub*
config.h.in
aclocal.m4
compile
configure
depcomp
install-sh
missing
autom4te.cache
*~
doc/html
doc/latex
\#*#
ltmain.sh
m4/*.m4
*.orig
sudo: required
cache: apt
# addons:
# apt:
# sources:
# - ubuntu-toolchain-r-test
# packages:
# - g++-5
# - g++-6
# - g++-7
# env:
# matrix:
# - CXX=g++-5
# - CXX=g++-6
# - CXX=g++-7
language: cpp
matrix:
include:
- os: linux
dist: trusty
compiler: g++
env: CXX_COMPILER=g++-5
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-5
- pandoc
- os: linux
dist: trusty
env: CXX_COMPILER=g++-6
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-6
- pandoc
- os: linux
dist: trusty
env: CXX_COMPILER=g++-7
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-7
- pandoc
- os: linux
dist: trusty
compiler: clang++
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-7
- pandoc
- os: osx
osx_image: xcode9.2
- os: osx
osx_image: xcode8.3
- os: osx
osx_image: xcode7.3
- os: osx
osx_image: xcode6.4
language: C++
before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test;
sudo apt-get update -y -qq;
sudo apt-get install -y g++-5;
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-5 90;
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 90;
fi
install:
- sh bootstrap.sh
- mkdir bali-phy-build
# We need to re-export CC and CXX here, because travis exports CXX=g++ or clang++ AFTER we set CXX.
- if [ -n "${C_COMPILER}" ]; then export CC="${C_COMPILER}"; fi
- if [ -n "${CXX_COMPILER}" ]; then export CXX="${CXX_COMPILER}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update && brew install meson; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip && unzip -q ninja-linux.zip -d ~/bin && pyenv local 3.6 && pip3 install meson; fi
script:
- cd bali-phy-build
- ../configure --prefix=$HOME/local
- make -j3 install
- make check
- meson build --prefix=$HOME/bali-phy
- ninja -C build test
- ninja -C build install
env:
global:
- secure: ZWEqJt9rWuQQ0uJYx1ZDKASbe1lNOq8dLxfGzRq6ItWZ7v42iFik7WQzQNRcHskfkLnpX48+3iGz8+M6QFIY+y9KKk3hgsR+zEW/kZSlBxA+VSIOP2lV3Z5zBlnO3m0WoLTrDABBfHRFGqjU2JcM2tXLc6TyislMV5Y20edJgNWLNajGdei7+7A48UBSPq1hIA0fU2LZAScdy2TxcbpL8x/aAmE5T1IRBewjXK1x4CARmLJYXZfJDeCUpif2VoApEGwCOqXQtTKrBpJbUQ+sYi+JHCGh3oDFoHmuFJeEM7HrUOJBOI8k5A7wRq/+JESGIa2UObdHu1GZDVxj22YRRSSzQTEy9QyTnFTdYbkkcG83wwZ/57EOAhKA1TqwEDj8Hhux/oQAkR/S27iQ2vCvUtx5wlzX4L2KhinHhVVW7EPs6NQLc8FiRHjAzSaEmLph8UXlSxa+2Yx/+wyQXH3SmJi4r470r5F/orVhZBkw2k7CPhg7YfBPFDODkg+QMIk60hiVE6n/ToeTOCTxti0Lfajq6pzl+KFs/pungbnwZqx8neVGkpE0Z5Lhgr1KV/W17/AxKC62+BIGTRSQXpCxignVXbWqvzLCGCgSxiJ7Y+uphS1UV5lz1EAPTqnPkcxe+1MV64vAe8zMvug4Mb2DsV9QjIvjot1X+FlyzHWhMpc=
......
# Contributing to BAli-Phy
We are excited to see what you will contribute!
## Patches
Patches should be sent through a github PR ("pull request"). This causes the CI tests to run for the patch.
See the [Developer's Guide](http://www.bali-phy.org/developer.html) for information how to modify bali-phy.
\ No newline at end of file
[![Build Status](https://www.travis-ci.org/bredelings/BAli-Phy.svg?branch=master
)](https://www.travis-ci.org/bredelings/BAli-Phy)
[![Appveyor](https://ci.appveyor.com/api/projects/status/q68hnnoelqqvwsy2?svg=true)](https://ci.appveyor.com/project/bredelings/bali-phy)
Install
-------
......@@ -14,6 +15,7 @@ Documentation
* [http://bali-phy.org/](http://bali-phy.org/)
* [Manual](http://bali-phy.org/README.xhtml)
* [Tutorial](http://bali-phy.org/Tutorial3.html)
* [Developer's Guide](http://bali-phy.org/developer.html)
The Manual describes [how to install](http://bali-phy.org/README.xhtml#installation) bali-phy in detail. Simplified instructions are below.
......@@ -32,12 +34,9 @@ If you build with meson and ninja, you need
* python3
* ninja
If you build with autotools, you need
* autoconf
* automake
* libtool
You need meson version >= 0.45 to build bali-phy.
Build with meson (fastest)
Build with meson
----------------
```bash
sudo apt-get install g++ libcairo2-dev ninja-build python3
......@@ -51,32 +50,11 @@ python3 -m venv meson
source meson/bin/activate
pip3 install meson
meson build --prefix=$HOME/Applications/bali-phy # Two warnings about 'export_dynamic' are OK.
cd build
ninja install
meson build --prefix=$HOME/Applications/bali-phy
ninja -C install
ninja -C test
```
Build with autotools (slower)
-------------------
```bash
sudo apt-get install g++ libcairo2-dev autoconf automake libtool
git clone https://github.com/bredelings/BAli-Phy.git
cd BAli-Phy/
git submodule update --init
./bootstrap.sh
mkdir build
cd build
../configure --prefix=$HOME/Applications/bali-phy/
make
make check
make install
```
Additional options to `configure` can be revealed by supplying the `--help` flag.
Adding bali-phy to your `$PATH`
------------------------------
......
bali-phy (3.0.3+dfsg-2) UNRELEASED; urgency=medium
bali-phy (3.1+dfsg-1) UNRELEASED; urgency=medium
[ Dylan Aïssi ]
* Team upload.
* Add references to registries.
-- Dylan Aïssi <bob.dybian@gmail.com> Wed, 25 Apr 2018 22:59:52 +0200
[ Benjamin Redelings ]
* Stop excluding files that have been removed upstream.
* New upstream version
-- Benjamin Redelings <benjamin.redelings@gmail.com> Wed, 02 May 2018 17:47:42 -0400
bali-phy (3.0.3+dfsg-1) unstable; urgency=medium
......
......@@ -2,11 +2,6 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: bali-phy
Source: http://www.bali-phy.com
Files-Excluded: */external
*/m4
*/configure.ac
*/bootstrap.sh
*/git_version.sh
*/Makefile.am
*/dlfcn-win32
Files: *
......
This diff is collapsed.
This diff is collapsed.
No preview for this file type
Source diff could not be displayed: it is too large. Options to address this: view the blob.
......@@ -10,7 +10,7 @@
<article xmlns="http://docbook.org/ns/docbook" version="5.0"
xmlns:mml="http://www.w3.org/1998/Math/MathML"
xml:lang="en">
<info><title><application>BAli-Phy</application> Tutorial (for version 3.0-beta)</title>
<info><title><application>BAli-Phy</application> Tutorial (for version 3.1)</title>
<author><personname><firstname>Benjamin</firstname><surname>Redelings</surname></personname></author>
</info>
......@@ -34,7 +34,7 @@ Extract the compressed archive:
% tar -zxf examples.tgz
Take a look inside the <filename>examples</filename> directory:
% ls examples
Take a look at an input file:
Take a look at an input file (you can press 'q' to exit 'less'):
% less examples/5S-rRNA/5d.fasta
Get some information about the alignment:
% alignment-info examples/5S-rRNA/5d.fasta
......@@ -59,9 +59,13 @@ You can also show help for advanced options:
You can get help on the command line options:
% bali-phy --help=iterations | less
You can also get help on models, distributions, and functions:
% bali-phy --help=TN | less
% bali-phy --help=Normal | less
% bali-phy --help=tn93 | less
% bali-phy --help=normal | less
% bali-phy --help=quantile
You can also get help on a topic using a less verbose syntax:
% bali-phy help tn93
% bali-phy help normal
% bali-phy help quantile
</para>
</section>
......@@ -100,10 +104,10 @@ The program Tracer graphically displays the posterior probability distribution f
How does the evolutionary process for these genes differ in:
<orderedlist>
<listitem>substitution rate? (Scale&lt;p&gt;)</listitem>
<listitem>insertion-deletion rate? (I&lt;p&gt;.logLambda)</listitem>
<listitem>nucleotide frequencies? (S&lt;p&gt;.TN.F.pi.{A,T,G,C})</listitem>
<listitem>number of indels? (#indels)</listitem>
<listitem>substitution rate? (<userinput>Scale[1]</userinput>, <userinput>Scale[2]</userinput>, ...)</listitem>
<listitem>insertion-deletion rate? (<userinput>I1/rs07:log_rate</userinput>, <userinput>I2/rs07:log_rate</userinput>, ...)</listitem>
<listitem>nucleotide frequencies? (<userinput>S1/tn93:pi[A]</userinput>, <userinput>S1/tn93:pi[C]</userinput>, ... )</listitem>
<listitem>number of indels? (<userinput>#indels</userinput>)</listitem>
</orderedlist>
</para>
</section>
......@@ -124,70 +128,71 @@ The <userinput>-Inone</userinput> is a short form of <userinput>--imodel=none</u
<para>When you don't specify values for parameters like <parameter>imodel</parameter>, bali-phy uses sensible defaults. For example, these two commands are equivalent:
% cd ~/alignment_files/examples/
% bali-phy 5S-rRNA/25-muscle.fasta --test
% bali-phy 5S-rRNA/25-muscle.fasta --test --alphabet=RNA --smodel=TN --imodel=RS07
% bali-phy 5S-rRNA/25-muscle.fasta --test --alphabet=RNA --smodel=tn93 --imodel=rs07
You can change the substitution model from the Tamura-Nei model to the General Time-Reversible model:
% bali-phy 5S-rRNA/25-muscle.fasta --test --smodel=GTR
% bali-phy 5S-rRNA/25-muscle.fasta --test -S gtr
Here the <userinput>-S gtr</userinput> is a short form of <userinput>--smodel=gtr</userinput>, where <parameter>smodel</parameter> means the substitution model.
</para>
</section>
<section><info><title>Rate variation</title></info>
<para>
You can also allow different sites to evolve at 5 different rates using the gamma[4]+INV model of rate heterogeneity:
% bali-phy 5S-rRNA/25-muscle.fasta --test --smodel=GTR+Rates.Gamma[4]+INV
% bali-phy 5S-rRNA/25-muscle.fasta --test -S gtr+Rates.gamma[4]+inv
You can allow 5 different rates that are all independently estimated:
% bali-phy 5S-rRNA/25-muscle.fasta --test --smodel=GTR+DP[n=5]
% bali-phy 5S-rRNA/25-muscle.fasta --test -S gtr+Rates.free[n=5]
</para>
</section>
<section><info><title>Codon models</title></info>
<para>
We can also conduct codon-based analyses using the Nielsen and Yang (1998) M0 model of diversifying positive selection (dN/dS):
% bali-phy Globins/bglobin.fasta --test --smodel=M0
The M0 model takes a nucleotide exchange model as a parameter. This parameter is optional, and the default is HKY, which you could specify as M0[,HKY]. You can change this to be more flexible:
% bali-phy Globins/bglobin.fasta --test --smodel=M0[,GTR]
We can also conduct codon-based analyses using the Nielsen and Yang (1998) model of diversifying positive selection (dN/dS):
% bali-phy Globins/bglobin.fasta --test -S yn94+f1x4
The yn94 model takes a nucleotide exchange model as a parameter. This parameter is optional, and the default is hky85, which you could specify as yn94[,hky85_sym]. You can change this to be more flexible:
% bali-phy Globins/bglobin.fasta --test -S yn94[,gtr_sym]+f1x4
You can make the codon frequencies to be generated from a single set of nucleotide frequencies:
% bali-phy Globins/bglobin.fasta --test --smodel=M0[,GTR]+MG94
% bali-phy Globins/bglobin.fasta --test -S yn94[,gtr_sym]+mg94
The M7 model allows different sites to have different dN/dS values, where the probability of dN/dS values follows a beta distribution:
% bali-phy Globins/bglobin.fasta --test --smodel=M7
% bali-phy Globins/bglobin.fasta --test -S m7
The M7 model has parameters as well. Here are the defaults:
% bali-phy Globins/bglobin.fasta --test --smodel=M7[4,HKY,F61]
% bali-phy Globins/bglobin.fasta --test -S m7[4,hky85_sym,f61]
The M3 model allows different sites to have different dN/dS values, but directly estimates what these values are:
% bali-phy Globins/bglobin.fasta --test --smodel=M3[n=3]
% bali-phy Globins/bglobin.fasta --test -S m3[n=3]
The M8a_Test model allows testing for positive selection in some fraction of the sites:
% bali-phy Globins/bglobin.fasta --test --smodel=M8a_Test[4,HKY,F3x4]
% bali-phy Globins/bglobin.fasta --test -S m8a_test[4,hky85_sym,f3x4]
</para>
</section>
<section><info><title>Fixing parameter values</title></info>
<para>
We can use the TN+Gamma[4]+INV model without specifying parameters:
% bali-phy Globins/bglobin.fasta --test --smodel=TN+Rates.Gamma+INV
We can use the TN93+Gamma[4]+INV model without specifying parameters:
% bali-phy Globins/bglobin.fasta --test -S tn93+Rates.gamma+inv
However, we can also fix parameter values:
% bali-phy Globins/bglobin.fasta --test --smodel=TN+Rates.Gamma[n=4,alpha=1]+INV[pInv=0.2]
% bali-phy Globins/bglobin.fasta --test -S tn93+Rates.gamma[n=4,alpha=1]+inv[p_inv=0.2]
Here we have set the shape parameter for the Gamma distribution to 1, and the
fraction of invariant sites to 20%. Since these parameters are fixed, they will
not be estimated and their values will not be shown in the log file.
</para>
<para>
You can see the parameters for a model by using the <userinput>help</userinput> command, as in:
% bali-phy help Rates.Gamma
% bali-phy help Rates.gamma
This will show the default value or default prior for each parameter, if there is one.
</para>
</section>
<section><info><title>Priors</title></info>
<para>
By default the fraction of invariant sites follows a Uniform[0,1] distribution:
% bali-phy help INV
By default the fraction of invariant sites follows a uniform[0,1] distribution:
% bali-phy help inv
However, we can specify an alternative prior:
% bali-phy Globins/bglobin.fasta --test -S TN+Rates.Gamma[n=4]+INV[pInv~Uniform[0,0.2]]
% bali-phy Globins/bglobin.fasta --test -S tn93+Rates.gamma[n=4]+inv[p_inv~uniform[0,0.2]]
We can also specify parameters as positional arguments instead of using variable names:
% bali-phy Globins/bglobin.fasta --test -S TN+Rates.Gamma[4]+INV[~Uniform[0,0.2]]
Here "<userinput>~</userinput>" indicates a sample from the Uniform distribution instead of the distribution
% bali-phy Globins/bglobin.fasta --test -S tn93+Rates.gamma[4]+inv[~uniform[0,0.2]]
Here "<userinput>~</userinput>" indicates a sample from the uniform distribution instead of the distribution
itself.
</para>
<para>
The insertion-deletion model also has parameters.
% bali-phy help RS07
Here the default value for meanIndelLength is Add[1,~Exponential[10]]. This indicates
% bali-phy help rs07
Here the default value for rs07:mean_length is exponential[10,1]. This indicates
a random value that is obtained by sampling an Exponential random variable with mean 10
and then adding 1 to it.
</para>
......@@ -290,7 +295,7 @@ parameters. This is described in more detail in section 4.3 of the <link xmlns:
Now lets specify different substitution models for different partitions.
% cd ~/alignment_files/examples/ITS
% bali-phy {ITS1,5.8S,ITS2}.fasta --smodel=1:GTR --smodel=2:HKY --smodel=3:TN --test
% bali-phy {ITS1,5.8S,ITS2}.fasta -S 1:gtr -S 2:hky85 -S 3:tn93 --test
</para>
<para>
</para>
......@@ -299,8 +304,8 @@ Now lets specify different substitution models for different partitions.
<section><info><title>Disabling alignment estimation for some partitions</title></info>
<para>
We can also disable alignment estimation for some, but not all, partitions:
% bali-phy {ITS1,5.8S,ITS2}.fasta --imodel=1:RS07 --imodel=2:none --imodel=3:RS07 --test
Specifying <userinput>--imodel=none</userinput> removes the insertion-deletion
% bali-phy {ITS1,5.8S,ITS2}.fasta -I 1:rs07 -I 2:none -I 3:rs07 --test
Specifying <userinput>-I none</userinput> removes the insertion-deletion
model and parameters for partition 2 and also disables alignment estimation for that partition.</para>
<para>Note that there is no longer an I3 indel model. Partition #3 now has the I2 indel model.
</para>
......@@ -308,7 +313,7 @@ model and parameters for partition 2 and also disables alignment estimation for
<section><info><title>Sharing model parameters between partitions</title></info>
<para>We can also specify that some partitions with the same model also share the
same parameters for that model:
% bali-phy {ITS1,5.8S,ITS2}.fasta --smodel=1,3:GTR --imodel=1,3:RS07 --smodel=2:TN --imodel=2:none --test
% bali-phy {ITS1,5.8S,ITS2}.fasta -S 1,3:gtr -I 1,3:rs07 -S 2:tn93 -I 2:none --test
This means that the information is <emphasis>pooled</emphasis> between the partitions to better estimate the shared parameters.</para>
<para>Take a look at the model parameters, and the parentheticals after the model descriptions. You should see that there is no longer an S3 substitution model or an I3 indel model. Instead, partitions #1 and #3 share the S1 substitution model and the I1 indel model.
......@@ -317,7 +322,7 @@ This means that the information is <emphasis>pooled</emphasis> between the parti
</section>
<section><info><title>Sharing substitution rates between partitions</title></info>
<para>We can also specify that some partitions share the same scaling factor for branch lengths:
% bali-phy {ITS1,5.8S,ITS2}.fasta --smodel=1,3:GTR --imodel=1,3:RS07 --smodel=2:TN --imodel=2:none --scale=1,3: --test
% bali-phy {ITS1,5.8S,ITS2}.fasta -S 1,3:gtr -I 1,3:rs07 -S 2:tn93 -I 2:none --scale=1,3: --test
This means that the branch lengths for partitions 1 and 3 are the same, instead of being independently estimated.</para>
<para>Take a look at the model parameters. There is no longer a Scale[3] parameter. Instead, partitions 1 and 3 share Scale[1].</para>
</section>
......@@ -329,8 +334,8 @@ a text file called <filename>analysis1.script</filename>:
<programlisting>align = ITS1.fasta
align = 5.8S.fasta
align = ITS2.fasta
smodel = 1,3:TN+DP[n=3]
smodel = 2:TN
smodel = 1,3:tn93+Rates.free[n=3]
smodel = 2:tn93
imodel = 2:none
scale = 1,3:
</programlisting>
......
This diff is collapsed.
% Guide to development in BAli-Phy
# Getting started
## Fork the repo
1. Click the "Fork" button on [https://github.com/bredelings/BAli-Phy/](https://github.com/bredelings/BAli-Phy). This will create a separate copy of the repo under your own account.
1. Use `git clone` to download your own version of the repo:
``` sh
git clone git@github.com:your-username/BAli-Phy.git
```
The name `origin` in your local repo will then refer to your modified version of BAli-Phy.
1. To refer to the official upstream version, create a new remote called `upstream`:
``` sh
cd BAli-Phy/
git remote add upstream git@github.com:bredelings/BAli-Phy.git
git remote -v
```
## Repo overview
These directories contain code that affects how `bali-phy` runs:
[modules/](https://github.com/bredelings/BAli-Phy/blob/master/modules)
: Haskell code
[src/](https://github.com/bredelings/BAli-Phy/blob/master/src)
: C++14 code
[functions/](https://github.com/bredelings/BAli-Phy/blob/master/functions)
: JSON definitions of functions for the command-line interface
[help/](https://github.com/bredelings/BAli-Phy/blob/master/help)
: Help files
These directories contain documentation and examples:
[doc/](https://github.com/bredelings/BAli-Phy/blob/master/doc/)
: Documentation
[doc/man/](https://github.com/bredelings/BAli-Phy/blob/master/doc/man/)
: Markdown files for generating UNIX manual pages
[examples/sequences/](https://github.com/bredelings/BAli-Phy/blob/master/examples/sequences/)
: Example sequences
[examples/models/](https://github.com/bredelings/BAli-Phy/blob/master/examples/models/)
: Example files for running graphical models.
## Contributions
We are excited to see what you will contribute!
The way to submit patches is:
1. First develop changes in your own repo.
1. Send a [pull request](https://help.github.com/articles/about-pull-requests/) through github.
1. CI tests will run automatically on the on suggested changes.
1. We will review the changes.
1. If accepted, changes will be merged to the master branch.
# Building bali-phy
## Prequisites
You will need a C++ compiler that understands C++14.
* gcc 5 (or higher) works
* clang 3.5 (or higher) works
* XCode 6.0 (or higher) works
You will also need to install
* cairo graphics library (optional, but required to build the `draw-tree` program)
To build the executables, you will need
* meson
* ninja
To build the documentation, you will need
* pandoc
On Debian and Ubuntu systems you can install all the prequisites with the following command:
``` sh
sudo apt-get install g++ libcairo2-dev ninja-build meson pandoc
```
## Compilation
``` sh
cd BAli-Phy/
meson build --prefix=$HOME/Applications/bali-phy
cd build
ninja install
```
# Adding functionality to bali-phy
## Adding a Haskell function
Haskell functions are defined in the Haskell modules under [modules/](https://github.com/bredelings/BAli-Phy/blob/master/modules/). For example, the function `min` is defined in `Prelude.hs` as follows:
``` Haskell
min x y = if (x <= y) then x else y;
```
To add a Haskell function, you simply need to define a function in one of these modules. However, be aware that the Haskell parser in bali-phy is not very advanced, and so you will need to specify `{`, `}`, and `;` in places where they are optional in standard Haskell.
## Adding a C++ function
To add a "builtin" C++ operation to bali-phy's Haskell code, you must add the C++ code for the operation to one of the C++ files in the [src/builtins/](https://github.com/bredelings/BAli-Phy/blob/master/src/builtins) directory. You must then declare the builtin in one of the Haskell files in the [modules/](https://github.com/bredelings/BAli-Phy/blob/master/modules/) directory.
### Declaring a builtin in Haskell
A builtin is declared via the following syntax:
``` Haskell
builtin haskell_name number_of_arguments "c++ name" "module name";
```
For example, the Haskell function `poisson_density` is declared with the following line from [modules/Distributions.hs](https://github.com/bredelings/BAli-Phy/blob/master/modules/Distributions.hs):
``` Haskell
builtin poisson_density 2 "poisson_density" "Distribution";
```
The first two arguments specify the Haskell name (`poisson_density`) and the number of arguments in Haskell (`2`). The C++ function name is derived from the third argument (`poisson_density`) by adding `builtin_function_` in front. So the C++ function will be called `builtin_function_poisson_density`. The last argument specifies which loadable module contains the C++ function. Since this function is in the module "Distribution", its source code goes in [src/builtins/Distribution.cc](https://github.com/bredelings/BAli-Phy/blob/master/src/builtins/Distribution.cc).
### Writing a builtin in C++
The C++ function for a builtin must be defined in one of the C++ files in the [src/builtins](https://github.com/bredelings/BAli-Phy/blob/master/src/builtins) directory, and the function name must begin with `builtin_function_`. The function must also be declared `extern "C"` (to avoid name mangling).
For example, the poisson density function is written in [src/builtins/Distirbution.cc](https://github.com/bredelings/BAli-Phy/blob/master/src/builtins/Distribution.cc) as follows:
``` C++
extern "C" closure builtin_function_poisson_density(OperationArgs& Args)
{
double mu = Args.evaluate(0).as_double();
int n = Args.evaluate(1).as_int();
return { poisson_pdf(mu,n) };
}
```
Input:
* The function takes a single `OperationArgs& Args` argument.
* The `n`th argument is fetched by calling `Args.evaluate(n)`, and is of type `expression_ref` ([src/computation/expression/expression_ref.H](https://github.com/bredelings/BAli-Phy/blob/master/src/computation/expression/expression_ref.H))
* The `expression_ref` can be converted to `int', `double`, or `log_double_t` using the methods `.as_int()`, `.as_double()` and `.as_log_double()`.
Output:
* The function returns a `closure` object ([src/computation/closure.H](https://github.com/bredelings/BAli-Phy/blob/master/src/computation/closure.H))
* A closure can be created from a `double` or `int`. Here an explicit conversion is invoked by surrouding a `log_double_t` with curly braces.
## Adding a distribution
Distributions are defined in [modules/Distributions.hs](https://github.com/bredelings/BAli-Phy/blob/master/modules/Distributions.hs).
For a distribution, you need to add a function that constructs a ProbDensity object.
``` Haskell
name parameters = ProbDensity (density parameters) (quantile parameters) (sample parameters) (range parameters);
```
For example, the Normal distribution is defined as:
``` Haskell
builtin normal_density 3 "normal_density" "Distribution";
builtin normal_quantile 3 "normal_quantile" "Distribution";
builtin builtin_sample_normal 2 "sample_normal" "Distribution";
sample_normal m s = Random (IOAction2 builtin_sample_normal m s);
normal m s = ProbDensity (normal_density m s) (normal_quantile m s) (sample_normal m s) realLine;
```
### Density
A density function takes an extra argument after the distribution parameters. For example, the normal density takes 3 arguments, so that `(normal_density m s)` is a function of the third argument.
A density function should return type `log_double_t`.
### Quantile
A quantile function takes an extra argument after the distribution parameters. For example, the normal quantile takes 3 arguments, so that `(normal_quantile m s)` is a function of the third argument. The extra argument should have type `double`, and ranges from 0 to 1.
If the function is not univariate, or does not have a quantile functon, set the quantile function to `(no_quantile "distribution name")`. This will later change to use polymorphism, where only 1-dimensional functions will have a quantile attribute.
### Sample
To construct a random sample from a C++ procedure, access the `n`th parameter via `Args.evaluate_(n)` (with an underscore) instead of `Args.evaluate(n)`.
For example:
``` C++
extern "C" closure builtin_function_sample_normal(OperationArgs& Args)
{
double a1 = Args.evaluate_(0).as_double();
double a2 = Args.evaluate_(1).as_double();
return { gaussian(a1, a2) };
}
```
Then use one of the following patterns, depending on how many arguments your sampling routine takes:
``` Haskell
sample_dist arg1 = Random (IOAction1 builtin_sample_dist arg1);
sample_dist arg1 arg2 = Random (IOAction2 builtin_sample_dist arg1 arg2);
sample_dist arg1 arg2 arg3 = Random (IOAction3 builtin_sample_dist arg1 arg2 arg3);
```
For example:
``` Haskell
builtin builtin_sample_normal 2 "sample_normal" "Distribution";
sample_normal m s = Random (IOAction2 builtin_sample_normal m s);
```
The `(dist_sample parameters)` function returns an object in the Random monad, where executing a distribution has the semantics of sampling from the distribution. The sampling procedure can also call other actions in the Random monad. For example, here we sample from the distribution `(dist2 args)` and transform the result.
``` Haskell
sample_dist args = do { x <- dist2 args; return (f x);}
```
### Range
Ranges for real numbers are:
* above x
* below x
* between x y
* realLine
Ranges for Integers are:
* integer_above i
* integer_below i
* integer_between i j
In each case, the range includes the bounds. For example, `(integer_above 0)` includes `0`, and `(integer_between 0 1)` includes `0` and `1`.
Ranges for Booleans are:
* TrueFalseRange
Ranges for simplices are
* Simplex n sum
where `n` is the number of dimensions, and `sum` is the sum of the values (usually `1.0`).
## Using a function from the command line
To make a Haskell function accessible from the command line, you must add a JSON file to the directory `functions/` that registers the Haskell function.
For example, the file `functions/HKY.json` allows the user to specify (for example) `-S HKY[kappa=2]` as a substitution model. It connects the command line phrase `HKY[kapp=2]` with the Haskell function `SModel.hky` defined in the file `modules/SModel.hs`.
The JSON looks like this:
``` json
{
"name": "HKY",
"title": "The Hasegawa-Kishino-Yano (1985) nucleotide rate matrix",
"result_type": "ExchangeModel[a]",
"constraints": ["Nucleotides[a]"],
"citation":{"type": "article",
"title": "Dating of the human-ape splitting by a molecular clock of mitochondrial DNA",
"year": "1985",
"author": [{"name": "Hasegawa, Masami"}, {"name": "Kishino, Hirohisa"}, {"name": "Yano, Taka-aki"}],
"journal": {"name": "Journal of molecular evolution", "volume": "22",
"identifier": [{"type":"doi","id":"10.1007/BF02101694"}]
},
"call": "SModel.hky[kappa,alphabet]",
"args": [
{
"arg_name": "kappa",
"arg_type": "Double",
"default_value": "~logNormal[log[2],0.25]",
"description": "Transition\/transversion ratio"
},
{
"arg_name": "alphabet",
"arg_type": "a",
"default_value": "getAlphabet"
}
],
"description":"Technically, this is just the symmetric matrix from HKY"
}
```
The fields are defined as follows:
`name`
: specifies how this function will be invoked on the command line.
`call`
: specifies which Haskell function to call and the order of the arguments to pass.
`result_type`
: specifies the result type of the function.
`args`
: describes the list of named arguments
`args.arg_name`
: gives the name of each argument
`args.arg_type`
: gives the type of each argument
`args.default_value`
: gives a value for the argument if not specified (optional).
`args.description`
: gives a short phrase describing the argument (optional).
`description`
: gives a longer description of the function (optional).
`title`
: gives a title for the function (optional).
## Adding a new MCMC move
Most moves are currently defined in C++. Moves are actually added to the sampler in [src/mcmc/setup.cc](https://github.com/bredelings/BAli-Phy/blob/master/src/mcmc/setup.cc).
### `MCMC::MoveAll`
You can add other moves as sub-moves to an `MCMC::MoveAll`:
``` C++
MCMC::MoveAll M;
M.add(weight, MCMC::MH_Move( Proposal2M(proposal, m_index, parameters), name) );
```
The weight determines how many times the sub-move is run each iteration.
### `MCMC::SingleMove`
To add a generic MCMC move, create an `MCMC::SingleMove` with one of the following constructors:
``` C++
SingleMove(void (*move)(owned_ptr<Model>&,MoveStats&), const std::string& name);
SingleMove(void (*move)(owned_ptr<Model>&,MoveStats&), const std::string& name, const std::string& attributes);
```
You can pass in a function with signature `void(owned_ptr<Model>&,MoveStats&)` that performs the move. This is how moves that alter alignments are defined.
We use an `owned_ptr<>` so that we can treat Model& polymorphically.
### `MCMC::MH_Move`
The `MCMC::MH_Move` has the following constructors:
``` C++
MH_Move(const Proposal& P, const std::string& name);
MH_Move(const Proposal& P, const std::string& name, const std::string& attributes);
```
### Proposals
Proposals are defined in [src/mcmc/proposals.H](https://github.com/bredelings/BAli-Phy/blob/master/src/mcmc/proposals.H).
Proposals are generally defined as functions that alter the MCMC state and then return a proposal ratio:
``` C++
class Proposal: public Object {
public:
Proposal* clone() const =0;
virtual log_double_t operator()(Model& P) const=0;
};
```
Here `Model&` is the current state of the MCMC object. The type `log_double_t` is a probability (or probability_density) represented on the log scale.
#### Proposal2
The Proposal2 class has constructor:
``` C++
Proposal2(const Proposal_Fn& p, const std::vector<std::string>& s, const std::vector<std::string>& v, const Model& P);
```
The names in `s` are names of variables to modify, and the names in `v` are names of keys to look up to find tunable parameters such as jump sizes.
#### Proposal_Fn
The `Proposal_Fn` class represents an MCMC move that affects some number of variables `x`, with some number of tunable parameters `p`.
``` C++
class Proposal_Fn
{
public:
virtual log_double_t operator()(std::vector< expression_ref >& x,const std::vector<double>& p) const;
};
```
It is possible to compose `Proposal_Fn`s to create complex proposals, such as:
1. ``Reflect(bounds, shift_cauchy)``
2. ``log_scaled(between(-20, 20, shift_cauchy))``
3. ``log_scaled(between(-20, 20, shift_gaussian))``
# Types
## `log_double_t`
This is a positive real number represented in terms of its logarithm. Operators have been defined so that you can multiple, add, subtract, and divide this type.
## `Object`
All C++ objects that are access via Haskell code inherit from this type.
## `expression_ref`
An expression ref is basically either an atomic value or an Object followed by a list of `expression_ref`s
See [src/computation/expression/expression_ref.H](https://github.com/bredelings/BAli-Phy/blob/master/src/computation/expression/expression_ref.H)
## `closure`
A closure is an `expression_ref` with an environment.
See [src/computation/closure.H](https://github.com/bredelings/BAli-Phy/blob/master/src/computation/closure.H)
# Testing
BAli-Phy currently has two test suites.
## The `tests/` directory
## testiphy
......@@ -8,6 +8,17 @@ if pandoc.found()
manpage = prog+'.1'
custom_target(manpage,command:[pandoc,'-s','-t','man',markdown], output:manpage, capture: true, install: true, install_dir: join_paths(get_option('mandir'),'man1'))
endforeach
# Install the html for the dev guide
dev_guide_md =files('developer.md')
custom_target('developer.html',
command:[pandoc,'-s','-N','--css','doc.css','-t','html','--toc',dev_guide_md],
output:'developer.html',
capture: true,
install: true,
install_dir: join_paths(get_option('datadir'),'doc/bali-phy')
)
else
warning('Program "pandoc" not found! Cannot generate man pages')
endif
{
"name": "Beta",
"result_type": "Distribution[Double]",
"call": "Distributions.beta[a,b]",
"args": [
{
"arg_name": "a",
"arg_type": "Double"
},
{
"arg_name": "b",
"arg_type": "Double"
}
]
}
{
"name": "Exponential",
"result_type": "Distribution[Double]",
"call": "Distributions.exponential[mean]",
"args": [
{
"arg_name": "mean",
"arg_type": "Double"
}
]
}
{
"name": "DP",
"name": "Rates.free",
"synonyms": ["DP"],
"result_type": "MixtureModel[a]",
"call": "SModel.dp[submodel,rates,frequencies]",
"args": [
{
"arg_name": "rates",
"arg_type": "List[Double]",
"default_value": "~Dirichlet[n,2]",
"default_value": "~dirichlet[n,2]",
"description" : "Rates for each category"
},
{
"arg_name": "frequencies",
"arg_type": "List[Double]",
"default_value": "~Dirichlet[n,3]",
"default_value": "~dirichlet[n,3]",
"description" : "Frequencies for each category"
},
{
......@@ -29,5 +30,5 @@
],
"title": "Free rates model",
"description":"Rate heterogeneity model where the rate and frequency of each category can be estimated from the data.",
"examples": ["HKY+DP[n=3]"]
"examples": ["hky85+Rates.free[n=3]"]
}
{
"name": "Rates.Gamma",
"name": "Rates.gamma",
"synonyms": ["Rates.Gamma"],
"result_type": "MixtureModel[a]",
"call": "SModel.gamma_rates[submodel,alpha,n]",
"args": [
......@@ -12,7 +13,7 @@
{
"arg_name": "alpha",
"arg_type": "Double",
"default_value": "~logLaplace[-6,2]",
"default_value": "~log_laplace[-6,2]",
"description": "The shape parameter for the Gamma distribution"
},
{
......@@ -21,7 +22,7 @@
"description": "The model being scaled"
}
],
"examples": ["GTR+Rates.Gamma[4]+INV"],
"examples": ["gtr+Rates.gamma[4]+inv"],
"title": "The discrete-Gamma rate heterogeneity model",
"description": "The describes rate-heterogeneity across sites, where the rates for each site follow a Gamma distribution",
"citation":{"type": "article",
......
{
"name": "Rates.logNormal",
"name": "Rates.log_normal",
"synonyms": ["Rates.log_normal"],
"result_type": "MixtureModel[a]",
"call": "SModel.log_normal_rates[submodel,lsigma,n]",
"args": [
......@@ -13,7 +14,7 @@
"arg_name": "lsigma",
"arg_type": "Double",
"description": "The standard deviation parameter",
"default_value": "~logLaplace[-3,1]"
"default_value": "~log_laplace[-3,1]"
},
{
"arg_name": "submodel",
......@@ -21,5 +22,5 @@
"description": "The model being scaled"
}
],
"examples": ["GTR+Rates.logNormal[4]+INV"]
"examples": ["gtr+Rates.log_normal[4]+inv"]
}