Skip to content
Commits on Source (3)
......@@ -63,7 +63,7 @@ if (CMAKE_INSTALL_PREFIX)
endif (CMAKE_INSTALL_PREFIX)
include (GNUInstallDirs)
find_package (bpp-phyl-omics 2.0.0 REQUIRED)
find_package (bpp-phyl-omics 3.0.0 REQUIRED)
#Find boost libraries
SET(Boost_USE_STATIC_LIBS ${BUILD_STATIC})
......@@ -152,10 +152,10 @@ ADD_SUBDIRECTORY(man)
# Packager
SET(CPACK_PACKAGE_NAME "maffilter")
SET(CPACK_PACKAGE_VENDOR "Julien Y. Dutheil")
SET(CPACK_PACKAGE_VERSION "1.2.1")
SET(CPACK_PACKAGE_VERSION "1.3.0")
SET(CPACK_PACKAGE_VERSION_MAJOR "1")
SET(CPACK_PACKAGE_VERSION_MINOR "2")
SET(CPACK_PACKAGE_VERSION_PATCH "1")
SET(CPACK_PACKAGE_VERSION_MINOR "3")
SET(CPACK_PACKAGE_VERSION_PATCH "0")
SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Filtering of genome alignment in the Multiple Alignment Format (MAF)")
SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE")
SET(CPACK_RESOURCE_FILE_AUTHORS "${CMAKE_SOURCE_DIR}/AUTHORS")
......
29/04/18 -*- Version 1.3.0 -*-
13/04/18 Julien Dutheil
* Error output when not enough ingroup sequences in DiversityStatistics and SiteFrequencySpectrum
09/03/18 Julien Dutheil
* Logfile can now be disabled
* Merge filter can now rename chimeric contigs to avoid long concatenated names.
21/01/18 Julien Dutheil
* Added OrderFilter.
16/12/17 Julien Dutheil
* Added OutputAsTable filter
* Improved LiftOver
18/05/17 -*- Version 1.2.0 -*-
13/04/17 Julien Dutheil
......
......@@ -12,7 +12,7 @@ foreach (target ${maffilter-targets})
if (BUILD_STATIC)
target_link_libraries (${target} ${BPP_LIBS_STATIC})
target_link_libraries (${target} ${LIBS})
set_target_properties (${target} LINK_SEARCH_END_STATIC TRUE)
set_target_properties (${target} PROPERTIES LINK_SEARCH_END_STATIC TRUE)
else (BUILD_STATIC)
target_link_libraries (${target} ${BPP_LIBS_SHARED})
target_link_libraries (${target} ${LIBS})
......
This diff is collapsed.
......@@ -83,7 +83,7 @@ class OutputAsFeaturesMafIterator:
public:
MafBlock* analyseCurrentBlock_() throw (Exception) {
MafBlock* analyseCurrentBlock_() {
currentBlock_ = iterator_->nextBlock();
if (output_ && currentBlock_)
writeBlock(*output_, *currentBlock_);
......
......@@ -42,7 +42,7 @@ knowledge of the CeCILL license and that you accept its terms.
using namespace bpp;
using namespace std;
MafBlock* SystemCallMafIterator::analyseCurrentBlock_() throw (Exception) {
MafBlock* SystemCallMafIterator::analyseCurrentBlock_() {
currentBlock_ = iterator_->nextBlock();
if (! currentBlock_)
return 0;
......
......@@ -100,7 +100,7 @@ class SystemCallMafIterator:
public:
MafBlock* analyseCurrentBlock_() throw (Exception);
MafBlock* analyseCurrentBlock_();
};
......
......@@ -45,7 +45,7 @@ knowledge of the CeCILL license and that you accept its terms.
using namespace bpp;
using namespace std;
MafBlock* TreeBuildingSystemCallMafIterator::analyseCurrentBlock_() throw (Exception) {
MafBlock* TreeBuildingSystemCallMafIterator::analyseCurrentBlock_() {
currentBlock_ = iterator_->nextBlock();
if (! currentBlock_)
return 0;
......
......@@ -105,7 +105,7 @@ class TreeBuildingSystemCallMafIterator:
public:
MafBlock* analyseCurrentBlock_() throw (Exception);
MafBlock* analyseCurrentBlock_();
};
......
# MafFilter
The MafFilter genome alignment processor [[website]](https://jydu.github.io/maffilter/).
If you are using maffilter, please consider citing
```
Dutheil JY, Gaillard S, Stukenbrock EH.
BMC Genomics. 2014 Jan 22;15:53.
MafFilter: a highly flexible and extensible multiple genome alignment files processor.
```
theme: jekyll-theme-tactile
\ No newline at end of file
maffilter (1.2.1-1+dfsg-2) UNRELEASED; urgency=medium
maffilter (1.3.0+dfsg-1) UNRELEASED; urgency=medium
[ Steffen Moeller ]
* Added ref to OMICtools registry
(bio.tools and RRID entries not available)
-- Steffen Moeller <moeller@debian.org> Sat, 29 Jul 2017 19:29:20 +0200
[ Julien Y. Dutheil ]
* New upstream version (Closes: #896486).
* Standards-Version: 4.1.4
* debhelper 11
* Updated Vcs-Browser and Vcs-Git
* Updated Homepage
-- Julien Dutheil <julien.dutheil@univ-montp2.fr> Mon, 30 Apr 2018 11:57:36 +0200
maffilter (1.2.1+dfsg-1) unstable; urgency=medium
......
......@@ -4,17 +4,17 @@ Uploaders: Andreas Tille <tille@debian.org>,
Julien Dutheil <julien.dutheil@univ-montp2.fr>
Section: science
Priority: optional
Build-Depends: debhelper (>= 10),
Build-Depends: debhelper (>= 11~),
cmake,
texinfo,
libbpp-phyl-omics-dev (>= 2.3.1),
libbpp-phyl-omics-dev (>= 2.4.0),
libboost-iostreams-dev (>= 1.54.0),
zlib1g-dev,
libbz2-dev
Standards-Version: 3.9.8
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/maffilter.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/maffilter.git
Homepage: http://biopp.univ-montp2.fr/forge/maffilter
Standards-Version: 4.1.4
Vcs-Browser: https://salsa.debian.org/med-team/maffilter
Vcs-Git: https://salsa.debian.org/med-team/maffilter.git
Homepage: https://jydu.github.io/maffilter/
Package: maffilter
Architecture: any
......
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2d4085b..0ee0c4d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,7 +50,8 @@ else ()
# Generate command line args (always add -c to output compressed file to stdout)
if (${COMPRESS_PROGRAM} STREQUAL "gzip")
# -n for no timestamp in files (reproducible builds)
- set (COMPRESS_ARGS -c -n)
+ # -9 for maximum compression (lintian error)
+ set (COMPRESS_ARGS -c -n -9)
else ()
set (COMPRESS_ARGS -c)
endif ()
fix_doc_compression.patch
#!/usr/bin/make -f
# DH_VERBOSE := 1
#export DH_VERBOSE=1
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
export DEB_CFLAGS_MAINT_APPEND=-DNDEBUG -g
export DEB_CXXFLAGS_MAINT_APPEND=-DNDEBUG -g
%:
dh $@
override_dh_auto_configure:
dh_auto_configure -- -DCMAKE_BUILD_TYPE=RelWithDebInfo
override_dh_fixperms:
dh_fixperms
find debian/*/usr/share -name "*.bpp" -executable -exec chmod -x \{\} \;
\input texinfo @c -*-texinfo-*-
@c %**start of header
@setfilename maffilter.info
@settitle MafFilter Manual 1.2.1
@settitle MafFilter Manual 1.3.0
@c @documentencoding UTF-8
@afourpaper
@dircategory Science Biology Genetics
......@@ -12,9 +12,9 @@
@copying
This is the manual of MafFilter, version 1.2.1.
This is the manual of MafFilter, version 1.3.0.
Copyright @copyright{} 2017 Julien Y. Dutheil
Copyright @copyright{} 2018 Julien Y. Dutheil
@end copying
@titlepage
......@@ -65,6 +65,7 @@ Extracting data
* SelectOrphans::
* Merge::
* Concatenate::
* OrderFilter::
* ExtractFeature::
* SelectChr::
* WindowSplit::
......@@ -115,6 +116,7 @@ Exporting blocks and data
* Output::
* OutputAlignments::
* OutputAsTable::
* VcfOutput::
* MsmcOutput::
* PlinkOutput::
......@@ -340,6 +342,7 @@ The next section present all available filters and their corresponding arguments
* SelectOrphans::
* Merge::
* Concatenate::
* OrderFilter::
* ExtractFeature::
* SelectChr::
* WindowSplit::
......@@ -451,7 +454,8 @@ maf.filter= \
Merge( \
species=(species1, species2, etc), \
dist_max=0, \
ignore_chr=(Random,Unknown)), \
ignore_chr=(Random,Unknown), \
rename_chimeric_chromosomes=yes), \
[...]
@end example
@end cartouche
......@@ -473,11 +477,15 @@ If dist_max is greater than 0, sequences will be filled with 'N' to preserve coo
An optional parenthetic list of chromosomes to ignore (typically "Unknown", or "Random", etc.).
Sequences annotated with such chromosomes will not be checked for synteny and the corresponding block will not be fused.
@item rename_chimeric_chromosomes=@{boolean@}
When distinct chromosomes are merged (in non specified species), their names are concatenated. If this option is set to "yes",
then merged chromosomes will be renamed as "chimtigXXX" where XXX is a number auto-incremented for each species.
@end table
@c ------------------------------------------------------------------------------------------------------------------
@node Concatenate, ExtractFeature, Merge, Extracting
@node Concatenate, OrderFilter, Merge, Extracting
@subsection Concatenating: merge consecutive blocks up to a certain size
The @command{Concatenate} filter fuses consecutive blocks until the concatenated block reaches a minimal size.
......@@ -507,7 +515,46 @@ If given, only blocks with identical chromosome tags in the reference species wi
@c ------------------------------------------------------------------------------------------------------------------
@node ExtractFeature, SelectChr, Concatenate, Extracting
@node OrderFilter, ExtractFeature, Concatenate, Extracting
@subsection OrderFilter: check and eventually discard unordered or overlapping blocks
The @command{OrderFilter} filter check if blocks are sorted according to previous block.
@heading Synopsis:
@cartouche
@example
maf.filter= \
[...],
OrderFilter( \
reference=ref_genome, \
do_unsorted=discard, \
do_overlapping=discard), \
[...]
@end example
@end cartouche
@heading Arguments:
@table @command
@item reference=@{string@}
The sequence for which coordinates should be checked. Block where the reference sequence is absent will be considered as ordered.
Important note: the alignment is supposed to be projected according to the reference sequence. Using the filter on another species might lead to unexpected behavior, in particular as the the reference species is supposed to be mapped on the + strand only.
@item do_unsorted=@{none|discard|error@}
Action to perform in case an unsorted block is found (that is, when the right coordinate of the current block is before the left coordinate of the previous one).
If 'discard', the block will be removed. If 'error', the maffilter with stop with an error.
@item do_overlapping=@{none|discard|error@}
Action to perform in case an ovelrapping block is found (that is, when the left coordinate of the current block is before the right coordinate of the previous one, and the right coordinate of the current block is not before the left coordinate of the previous one).
If 'discard', the block will be removed. If 'error', the maffilter with stop with an error.
@end table
@c ------------------------------------------------------------------------------------------------------------------
@node ExtractFeature, SelectChr, OrderFilter, Extracting
@subsection Extract features from the alignment
The @command{ExtractFeature} extracts parts of the alignment corresponding to certain features.
......@@ -1399,7 +1446,7 @@ First species to consider (resp. second, third and fourth).
@subsubsection Site frequency spectrum
The @command{SiteFrequencySpectrum} computes the site frequency spectrum for each block.
Only positions in the alignment with only two states are considered.
The proportions of doubletons are then computed by bins.
The proportions of bi-allelic sites are then computed by bins.
Lets consider the following example with 7 sequences:
@verbatim
ACGT
......@@ -1416,7 +1463,7 @@ It is possible to compute all these frequencies individually by settingthe @comm
@verbatim
SiteFrequencySpectrum(bounds=(-0.5,0.5,1.5,2.5,3.5,4.5), ...)
@end verbatim
which will output the number of site with 0, 1, 2, 3 or 4 minor states. Sites with more than 2 states are always ocounted separately, as well as sites containing unresolved characters.
which will output the number of site with 0, 1, 2, 3 or 4 minor states. Sites with more than 2 states are always counted separately, as well as sites containing unresolved characters.
If one want to count only constant sites for instance, one can simply type
@verbatim
SiteFrequencySpectrum(bounds=(-0.5,0.5), ...)
......@@ -1523,7 +1570,7 @@ Note that the ``species'' terminology relates to multispecies alignments, as ori
@command{DiversityStatistics} computes sequence diversity statistics, including:
@itemize
@item
Number of seggregating sites,
Number of segregating sites,
@item
Watterson's theta.
@item
......@@ -1979,6 +2026,7 @@ Compression format for output file (if file != none).
@menu
* Output::
* OutputAlignments::
* OutputAsTable::
* VcfOutput::
* MsmcOutput::
* PlinkOutput::
......@@ -2022,7 +2070,7 @@ Tell if sequences should be masked (if a mask annotation is available), or if ma
@c ------------------------------------------------------------------------------------------------------------------
@node OutputAlignments, VcfOutput, Output, Exporting
@node OutputAlignments, OutputAsTable, Output, Exporting
@subsection Write alignment blocks to an external alignment file.
The @command{OutputAlignments} filter writes all blocks to an external alignment file, potentially losing some information such as coordinates and scores.
......@@ -2074,7 +2122,47 @@ The name of reference sequence, which will only be used when %c, %b or %e codes
@c ------------------------------------------------------------------------------------------------------------------
@node VcfOutput, MsmcOutput, OutputAlignments, Exporting
@node OutputAsTable, VcfOutput, OutputAlignments, Exporting
@subsection Export given positions into a table format.
The @command{OutputAsTable} filter writes the nucleotide content of given positions in a table format.
@heading Synopsis:
@cartouche
@example
maf.filter= \
[...],
OutputTable( \
file=data.txt, \
species=Species1,Species2,Species3, \
reference=RefSpecies, \
compression=none), \
[...]
@end example
@end cartouche
@heading Arguments:
@table @command
@item file=@{none|@{path@}@}
A file path where to write date in table format.
@item compression=@{none|gzip|zip|bzip2@}
Compression format for output file.
@item species=@{list of @{string@}@}
Select species for which data should be output (one column per species).
@item reference=@{string@}
Which species should be used for outputting coordinates.
@end table
@c ------------------------------------------------------------------------------------------------------------------
@node VcfOutput, MsmcOutput, OutputAsTable, Exporting
@subsection Call SNPs from alignment blocks and export to VCF file
The @command{VcfOutput} filter call SNPs from each block and output them to a VCF file.
......@@ -2155,7 +2243,7 @@ Compression format for output file.
A species name corresponding to the sequence to use as reference.
@item genotypes=@{list of species@}
A list of species for which seggregating sites should be called. It might not contain the reference species, which is then only used for coordinates output.
A list of species for which segregating sites should be called. It might not contain the reference species, which is then only used for coordinates output.
Blocks with do not contain all species will not be called, as well as block not contianing the reference species. Note that the alignment has to be projected
againt the reference species, otherwise an error will be reported.
......@@ -2203,7 +2291,7 @@ Compression format for output file.
A species name corresponding to the sequence to use as reference.
@item genotypes=@{list of species@}
A list of species for which seggregating sites should be called. It might not contain the reference species, which is then only used for coordinates output.
A list of species for which segregating sites should be called. It might not contain the reference species, which is then only used for coordinates output.
Blocks with do not contain all species will not be called, as well as block not contianing the reference species. Note that the alignment has to be projected
againt the reference species, otherwise an error will be reported.
......@@ -2362,6 +2450,7 @@ maf.filter= \
LiftOver( \
ref_species=species1, \
target_species=species2, \
target_closest_position=yes, \
feature.file=species1.gff3.gz, \
feature.file.compression=gzip, \
feature.format=GFF, \
......@@ -2381,6 +2470,9 @@ The name of the species for which the coordinates of the features are provided.
@item target_species=@{string@}
The name of the species to which the coordinates of the features should be converted.
@item target_closest_position=@{boolean@}
In case the target sequence has a gap at the given position, outputs the coordinate of the previous non-gap position, otherwise NA.
@item feature.file=@{path@}
The file where the features are described.
......
theme: jekyll-theme-tactile
title: MafFilter
description: a genome alignment processor
show_downloads: true
share: true
**MafFilter** is a program dedicated to the analysis of genome alignments. It parses and manipulates [MAF files](https://genome.ucsc.edu/FAQ/FAQformat.html#format5) as well as more simple fasta files. Despite various filtering options and format conversion tools, **MafFilter** can compute a wide range of statistics (phylogenetic trees, nucleotide diversity, inferrence of selection, etc.). Current version is 1.2.1.
## What can MafFilter do?
**MafFilter** applies a series of "filters" to a MAF file, in order to clean it, extract data and computer statistics while keeping track of the associated meta-data such as genome coordinates and quality scores.
* It can process the alignment to remove low-quality / ambiguous / masked regions.
* It can export data into a single or multiple alignment file in format such as Fasta or Clustal.
* It can read annotation data in GFF or GTF format, and extract the corresponding alignment.
* It can perform sliding windows calculations.
* It can reconstruct phylogeny/genealogy along the genome alignment.
* It can compute population genetics statistics, such as site frequency spectrum, number of fixed/polymorphic sites, etc.
## How can I get it?
**MafFilter** is built using the [Bio++ libraries](http://biopp.univ-montp2.fr), as well as the boost iostream library for handling of compressed files. [Debian](https://packages.debian.org/search?keywords=maffilter&searchon=names&suite=all&section=all) and [RPM](https://download.opensuse.org/repositories/home:/jdutheil:/Bio++2.3.0/) packages are available.
For compiling the programs yourself, from the downloaded sources or from the git repository, please follow the instructions from the [Bio++ website](http://biopp.univ-montp2.fr/wiki/index.php/Installation).
## How do I use it?
Several example data sets are distributed along with the source code of the package. A reference manual is also available [here](http://biopp.univ-montp2.fr/manual/html/maffilter/), or can be downloaded as [PDF](http://biopp.univ-montp2.fr/manual/pdf/maffilter/). Questions can be asked on the dedicated forum: [here](https://groups.google.com/forum/?hl=en#!forum/maffilter).
## References
The **MafFilter** program was published in
```
Dutheil JY, Gaillard S, Stukenbrock EH.
BMC Genomics. 2014 Jan 22;15:53.
MafFilter: a highly flexible and extensible multiple genome alignment files processor.
```
Please consider citing this work if you are using the program.
MafFilter was originally developped in the context of the study of the Gorilla genome sequence
```
Scally A, Dutheil JY, Hillier LW, Jordan GE, Goodhead I, Herrero J, Hobolth A, Lappalainen T, Mailund T, Marques-Bonet T, McCarthy S, Montgomery SH, Schwalie PC, Tang YA, Ward MC, Xue Y, Yngvadottir B, Alkan C, Andersen LN, Ayub Q, Ball EV, Beal K, Bradley BJ, Chen Y, Clee CM, Fitzgerald S, Graves TA, Gu Y, Heath P, Heger A, Karakoc E, Kolb-Kokocinski A, Laird GK, Lunter G, Meader S, Mort M, Mullikin JC, Munch K, O'Connor TD, Phillips AD, Prado-Martinez J, Rogers AS, Sajjadian S, Schmidt D, Shaw K, Simpson JT, Stenson PD, Turner DJ, Vigilant L, Vilella AJ, Whitener W, Zhu B, Cooper DN, de Jong P, Dermitzakis ET, Eichler EE, Flicek P, Goldman N, Mundy NI, Ning Z, Odom DT, Ponting CP, Quail MA, Ryder OA, Searle SM, Warren WC, Wilson RK, Schierup MH, Rogers J, Tyler-Smith C, Durbin R.
Nature. 2012 Mar 7;483(7388):169-75.
Insights into hominid evolution from the gorilla genome sequence.
```
and was further developped in the following studies:
```
Stukenbrock EH, Bataillon T, Dutheil JY, Hansen TT, Li R, Zala M, McDonald BA, Wang J, Schierup MH.
Genome Res. 2011 Dec;21(12):2157-66.
The making of a new pathogen: insights from comparative population genomics of the domesticated wheat pathogen Mycosphaerella graminicola and its wild sister species.
Stukenbrock EH, Christiansen FB, Hansen TT, Dutheil JY, Schierup MH.
Proc Natl Acad Sci U S A. 2012 Jul 3;109(27):10954-9.
Fusion of two divergent fungal individuals led to the recent emergence of a unique widespread pathogen species.
```
%define _basename maffilter
%define _version 1.2.1
%define _release 1
%define _prefix /usr
URL: http://bioweb.me/maffilter
Name: %{_basename}
Version: %{_version}
Release: %{_release}
Name: maffilter
Version: 1.3.0
Release: 1%{?dist}
License: CECILL-2.0
Vendor: The Bio++ Project
Source: %{_basename}-%{_version}.tar.gz
Source: %{name}-%{version}.tar.gz
Summary: The Multiple Alignment Format file processor
Group: Productivity/Scientific/Other
Requires: libbpp-phyl-omics2 = 2.3.1
Requires: libbpp-seq-omics2 = 2.3.1
Requires: libbpp-phyl11 = 2.3.1
Requires: libbpp-seq11 = 2.3.1
Requires: libbpp-core3 = 2.3.1
Requires: libbpp-phyl-omics3 = 2.4.0
Requires: libbpp-seq-omics3 = 2.4.0
Requires: libbpp-phyl12 = 2.4.0
Requires: libbpp-seq12 = 2.4.0
Requires: libbpp-core4 = 2.4.0
Requires: zlib
BuildRoot: %{_builddir}/%{_basename}-root
BuildRoot: %{_builddir}/%{name}-root
BuildRequires: cmake >= 2.8.11
BuildRequires: gcc-c++ >= 4.7.0
BuildRequires: groff
BuildRequires: texinfo >= 4.0.0
BuildRequires: libbpp-core3 = 2.3.1
BuildRequires: libbpp-core-devel = 2.3.1
BuildRequires: libbpp-seq11 = 2.3.1
BuildRequires: libbpp-seq-devel = 2.3.1
BuildRequires: libbpp-phyl11 = 2.3.1
BuildRequires: libbpp-phyl-devel = 2.3.1
BuildRequires: libbpp-seq-omics2 = 2.3.1
BuildRequires: libbpp-seq-omics-devel = 2.3.1
BuildRequires: libbpp-phyl-omics2 = 2.3.1
BuildRequires: libbpp-phyl-omics-devel = 2.3.1
BuildRequires: libbpp-core4 = 2.4.0
BuildRequires: libbpp-core-devel = 2.4.0
BuildRequires: libbpp-seq12 = 2.4.0
BuildRequires: libbpp-seq-devel = 2.4.0
BuildRequires: libbpp-phyl12 = 2.4.0
BuildRequires: libbpp-phyl-devel = 2.4.0
BuildRequires: libbpp-seq-omics3 = 2.4.0
BuildRequires: libbpp-seq-omics-devel = 2.4.0
BuildRequires: libbpp-phyl-omics3 = 2.4.0
BuildRequires: libbpp-phyl-omics-devel = 2.4.0
BuildRequires: zlib-devel
%if 0%{?fedora} >= 22
......@@ -110,6 +107,7 @@ rm -rf $RPM_BUILD_ROOT
%{_prefix}/share/info/maffilter.info.%{zipext}
%changelog
* Wed Mar 14 2018 Julien Dutheil <dutheil@evolbio.mpg.de> 1.3.0-1
* Fri Jun 09 2017 Julien Dutheil <dutheil@evolbio.mpg.de> 1.2.1-1
* Wed May 24 2017 Julien Dutheil <dutheil@evolbio.mpg.de> 1.2.0-1
* Fri Sep 26 2014 Julien Dutheil <julien.dutheil@univ-montp2.fr> 1.1.0-1
......