Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille
--- a/.hg_archival.txt
+++ b/.hg_archival.txt
-repo: b4e7c5505112b08d33dd30f4788429ba023e67f0
-node: c43e40a443edbd3c4cac7349d2679540578096f5
+repo: 092c2fe2278cb7f0b18d81faeb4aab98b89dc096
+node: b2f9b3286d4be376805e3b5c26cf141ed375c605
 branch: default
-tag: 2.6.0
+tag: 2.7.5
--- a/.hgignore
+++ b/.hgignore
+syntax: glob
+
+databases/
+*.pyc
+build/
+dist/
+*.egg-info/
--- a/.hgsubstate
+++ b/.hgsubstate
-f8823b8162ddea6533866afd27d5ed1ce6ff22e0 utils/export2graphlan
-0d8cb18ce9996e7ce4043a00294aeb2ed9bfa5f2 utils/hclust2
+c168a100f37e23e2c110849a8d91fac8da49f5bd utils/export2graphlan
+35dfd725e7f024fc6d0edef0cc191c7963108787 utils/hclust2
--- a/.hgtags
+++ b/.hgtags
-b4e7c5505112b08d33dd30f4788429ba023e67f0 2.0_alpha1
-60d254d499e2dd1a8b1cfe344236efa47f823ec6 2.0_beta1
-1b6df65b5a3e9feed0179f855c11fd197fe9a64f 2.0_beta2
-12cceaad3493085c4497898aaeff691913ddb633 2.0_beta3
-616a7debe7937672940130e6c5b26a9ef9e76fcd 2.0.0
-3959b668bbed6150698b594cbbc30a924e5d30e1 2.1.0
-0ef29ae841f52b53176ca264fb9f52f98713eb3c 2.2.0
-5424bb911dfcdb7212ea0949d4faeb6e69cfa61f 2.3.0
-6f2a1673af8565e93fb8e69238141889b7c87361 2.5.0
+092c2fe2278cb7f0b18d81faeb4aab98b89dc096 2.0_alpha1
+c5c90e145ff40fb2fc3827651d572aa9a724ba31 2.0_beta1
+7168beb9750d223736888cefa387252d019f6a10 2.0_beta2
+46a5e65865233da6d05ebded2700f1854bab9878 2.0_beta3
+9943159669e10c4943c1d3440866e93ce536617a 2.0.0
+56fbf765ffaa2b851c0bbd17f26083a6710a404e 2.1.0
+97d782790746b6a1124fac7cadeca7bfa9f797cf 2.2.0
+f3325ec17035523cf9f2ea7736afcc119bd94a89 2.3.0
+e424931b4d94d50cf62381c79c335935415b33b9 2.5.0
+6d6433aa0f6856bff2e84a757b4084736bc3738f 2.6.0
+8963e486f79043c79a299f7a684e4550b0115c32 2.7.0
+d8ab9ca4244c09a7a4995042a99fbba1e3598ac0 2.7.1
+a1fe0d15320c04f69d56f1b7dd31cff972a7b8df 2.7.2
--- a/README.md
+++ b/README.md
--- a/__init__.py
+++ b/__init__.py
+from metaphlan2 import metaphlan2
+from ._metaphlan2 import profile_single_fastq
+from ._metaphlan2 import profile_paired_fastq
+
+
+__author__ = metaphlan2.__author__
+__version__ = metaphlan2.__version__
+__date__ = metaphlan2.__date__
+
+__all__ = ['profile_single_fastq', 'profile_paired_fastq']
--- a/_metaphlan2.py
+++ b/_metaphlan2.py
+# Run MetaPhlAn2
+# Author: Francesco Asnicar
+# This module defines the functions which run MetaPhlAn2 on
+# single and paired fastq data.
+
+
+import subprocess as sb
+from q2_types.per_sample_sequences import SingleLanePerSampleSingleEndFastqDirFmt
+from q2_types.per_sample_sequences import SingleLanePerSamplePairedEndFastqDirFmt
+import tempfile
+import biom
+import os
+
+
+def metaphlan2_helper(raw_data, nproc, input_type, output_file, verbose=True):
+    cmd = ['metaphlan2.py', str(raw_data), '--input_type', str(input_type),
+           '--biom', str(output_file), '--nproc', str(nproc)]
+
+    if verbose:
+        print("\nRunning external command line application. This may print "
+              "messages to stdout and/or stderr.")
+        print("Command: {}".format(' '.join(cmd)), end='\n\n')
+
+    sb.run(cmd, check=True)
+
+
+def profile_single_fastq(raw_data: SingleLanePerSampleSingleEndFastqDirFmt,
+                         nproc: int=1) -> biom.Table:
+    output_biom = None
+
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        tmp_output_biom = os.path.join(tmp_dir, 'mp2_tmp_output.biom')
+        metaphlan2_helper(raw_data, nproc, 'multifastq', tmp_output_biom)
+        output_biom = biom.load_table(tmp_output_biom)
+
+    return output_biom
+
+
+def profile_paired_fastq(raw_data: SingleLanePerSamplePairedEndFastqDirFmt,
+                         nproc: int=1) -> biom.Table:
+    output_biom = None
+
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        tmp_output_biom = os.path.join(tmp_dir, 'mp2_tmp_output.biom')
+        metaphlan2_helper(raw_data, nproc, 'multifastq', tmp_output_biom)
+        output_biom = biom.load_table(tmp_output_biom)
+
+    return output_biom
--- a/db_v20/mpa_v20_m200.pkl
+++ b/db_v20/mpa_v20_m200.pkl
--- a/debian/changelog
+++ b/debian/changelog
+metaphlan2 (2.7.5-1) unstable; urgency=medium
+
+  * New upstream version (no data shiped with this archive any more)
+  * Standards-Version: 4.1.3
+  * debhelper 11
+  * db_v20/mpa_v20_m200.pkl was removed from upstream source so we can not
+    install this file
+
+ -- Andreas Tille <tille@debian.org>  Fri, 16 Feb 2018 11:12:16 +0100
+
 metaphlan2 (2.6.0+ds-2) unstable; urgency=medium

  * Fix location of marker information file

--- a/debian/compat
+++ b/debian/compat
-10
+11
--- a/debian/control
+++ b/debian/control
@@ -3,12 +3,12 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.
 Uploaders: Andreas Tille <tille@debian.org>
 Section: science
 Priority: optional
-Build-Depends: debhelper (>= 10),
+Build-Depends: debhelper (>= 11~),
               python-all,
               dh-python,
               pandoc,
               bowtie2
-Standards-Version: 3.9.8
+Standards-Version: 4.1.3
 Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/metaphlan2.git
 Vcs-Git: https://anonscm.debian.org/git/debian-med/metaphlan2.git
 Homepage: https://bitbucket.org/biobakery/metaphlan2

--- a/debian/copyright
+++ b/debian/copyright
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
 Upstream-Name: MetaPhlAn2
 Upstream-Contact:  Nicola Segata <nicola.segata@unitn.it>
 Source: https://bitbucket.org/biobakery/metaphlan2/downloads
-Files-Excluded: */*.bt2

 Files: *
-Copyright: 2012-2016 Duy Tin Truong, Nicola Segata and Curtis Huttenhower
+Copyright: 2012-2018 Duy Tin Truong, Nicola Segata and Curtis Huttenhower
 License: expat

 Files: debian/*
-Copyright: © 2016 Andreas Tille <tille@debian.org>
+Copyright: 2016-2018 Andreas Tille <tille@debian.org>
 License: expat

 License: expat

--- a/debian/install
+++ b/debian/install
 *.py		usr/share/metaphlan2
 utils		usr/share/metaphlan2
 debian/bin	usr
-db_v20		usr/share/metaphlan2
--- a/debian/patches/mpa_dir-is-usr_share_metaphlan2.patch
+++ b/debian/patches/mpa_dir-is-usr_share_metaphlan2.patch
@@ -7,16 +7,16 @@ Description: Instead of setting mpa_dir bash variable the path to the

 --- a/metaphlan2.py
 +++ b/metaphlan2.py
-@@ -385,7 +385,7 @@ def read_params(args):
-             
+@@ -417,7 +417,7 @@ def read_params(args):
+ 
             "*  You can also provide an externally BowTie2-mapped SAM if you specify this format with \n"
             "   --input_type. Two steps: first apply BowTie2 and then feed MetaPhlAn2 with the obtained sam:\n"
 -            "$ bowtie2 --sam-no-hd --sam-no-sq --no-unal --very-sensitive -S metagenome.sam -x ${mpa_dir}/db_v20/mpa_v20_m200 -U metagenome.fastq\n"
 +            "$ bowtie2 --sam-no-hd --sam-no-sq --no-unal --very-sensitive -S metagenome.sam -x /usr/share/metaphlan2/db_v20/mpa_v20_m200 -U metagenome.fastq\n"
             "$ metaphlan2.py metagenome.sam --input_type sam > profiled_metagenome.txt\n\n"
-             
+ 
             "*  Multiple alternative ways to pass the input are also available:\n"
-@@ -1107,7 +1107,7 @@ if __name__ == '__main__':
+@@ -1391,7 +1391,7 @@ def metaphlan2():
     # check for the mpa_pkl file
     if not os.path.isfile(pars['mpa_pkl']):
         sys.stderr.write("Error: Unable to find the mpa_pkl file at: " + pars['mpa_pkl'] +
@@ -24,25 +24,17 @@ Description: Instead of setting mpa_dir bash variable the path to the
 +                         "\nExpecting location /usr/share/metaphlan2/db_v20/mpa_v20_m200.pkl "
                          "\nSelect the file location with the option --mpa_pkl.\n"
                          "Exiting...\n\n")
-         sys.exit(1)           
-@@ -1155,7 +1155,7 @@ if __name__ == '__main__':
-             sys.stderr.write( "No MetaPhlAn BowTie2 database found "
-                               "[--bowtie2db option]! "
-                               "(or wrong path provided)."
-                              "\nExpecting location ${mpa_dir}/db_v20/map_v20_m200 "
-+                              "\nExpecting location /usr/share/metaphlan2/db_v20/mpa_v20_m200 "
-                               "\nExiting... " )
-             sys.exit(1)
- 
+         sys.exit(1)
 --- a/README.md
 +++ b/README.md
-@@ -60,32 +60,27 @@ Cloning the repository via the following
+@@ -82,33 +82,27 @@ Cloning the repository via the following
 
 This section presents some basic usages of MetaPhlAn2, for more advanced usages, please see at [its wiki](https://bitbucket.org/biobakery/biobakery/wiki/metaphlan2).
 
 -We assume here that ``metaphlan2.py`` is in the system path and that ``mpa_dir`` bash variable contains the main MetaPhlAn folder. You can set this two variables moving to your MetaPhlAn2 local folder and type:
+-
 -```
-#!cmd
+-#!bash
 -$ export PATH=`pwd`:$PATH
 -$ export mpa_dir=`pwd`
 -```
@@ -51,7 +43,7 @@ Description: Instead of setting mpa_dir bash variable the path to the
 Here is the basic example to profile a metagenome from raw reads (requires BowTie2 in the system path with execution and read permissions, Perl installed). 
 
 ```
- #!cmd
+ #!bash
 -$ metaphlan2.py metagenome.fastq --input_type fastq > profiled_metagenome.txt
 +$ metaphlan2 metagenome.fastq --input_type fastq > profiled_metagenome.txt
 ```
@@ -59,7 +51,7 @@ Description: Instead of setting mpa_dir bash variable the path to the
 It is highly recommended to save the intermediate BowTie2 output for re-running MetaPhlAn extremely quickly (--bowtie2out), and use multiple CPUs (--nproc) if available:
 
 ```
- #!cmd
+ #!bash
 -$ metaphlan2.py metagenome.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 --input_type fastq > profiled_metagenome.txt
 +$ metaphlan2 metagenome.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 --input_type fastq > profiled_metagenome.txt
 ```
@@ -67,15 +59,15 @@ Description: Instead of setting mpa_dir bash variable the path to the
 If you already mapped your metagenome against the marker DB (using a previous  MetaPhlAn run), you can obtain the results in few seconds by using the previously saved --bowtie2out file and specifying the input (--input_type bowtie2out):
 
 ```
- #!cmd
+ #!bash
 -$ metaphlan2.py metagenome.bowtie2.bz2 --nproc 5 --input_type bowtie2out > profiled_metagenome.txt
 +$ metaphlan2 metagenome.bowtie2.bz2 --nproc 5 --input_type bowtie2out > profiled_metagenome.txt
 ```
 
 You can also provide an externally BowTie2-mapped SAM if you specify this format with --input_type. Two steps here: first map your metagenome with BowTie2 and then feed MetaPhlAn2 with the obtained sam:
-@@ -93,41 +88,41 @@ You can also provide an externally BowTi
+@@ -116,41 +110,41 @@ You can also provide an externally BowTi
 ```
- #!cmd
+ #!bash
 $ bowtie2 --sam-no-hd --sam-no-sq --no-unal --very-sensitive -S metagenome.sam -x ${mpa_dir}/db_v20/mpa_v20_m200 -U metagenome.fastq
 -$ metaphlan2.py metagenome.sam --input_type sam > profiled_metagenome.txt
 +$ metaphlan2 metagenome.sam --input_type sam > profiled_metagenome.txt
@@ -84,31 +76,31 @@ Description: Instead of setting mpa_dir bash variable the path to the
 In order to make MetaPhlAn 2 easily compatible with complex metagenomic pipeline, there are now multiple alternative ways to pass the input:
 
 ```
- #!cmd
+ #!bash
 -$ cat metagenome.fastq | metaphlan2.py --input_type fastq > profiled_metagenome.txt
 +$ cat metagenome.fastq | metaphlan2 --input_type fastq > profiled_metagenome.txt
 ```
 
 ```
- #!cmd
+ #!bash
 -$ tar xjf metagenome.tar.bz2 --to-stdout | metaphlan2.py --input_type fastq --bowtie2db ${mpa_dir}/db_v20/mpa_v20_m200 > profiled_metagenome.txt
 +$ tar xjf metagenome.tar.bz2 --to-stdout | metaphlan2 --input_type fastq --bowtie2db ${mpa_dir}/db_v20/mpa_v20_m200 > profiled_metagenome.txt
 ```
 
 ```
- #!cmd
+ #!bash
 -$ metaphlan2.py --input_type fastq < metagenome.fastq > profiled_metagenome.txt
 +$ metaphlan2 --input_type fastq < metagenome.fastq > profiled_metagenome.txt
 ```
 
 ```
- #!cmd
+ #!bash
 -$ metaphlan2.py --input_type fastq <(bzcat metagenome.fastq.bz2) > profiled_metagenome.txt
 +$ metaphlan2 --input_type fastq <(bzcat metagenome.fastq.bz2) > profiled_metagenome.txt
 ```
 
 ```
- #!cmd
+ #!bash
 -$ metaphlan2.py --input_type fastq <(zcat metagenome_1.fastq.gz metagenome_2.fastq.gz) > profiled_metagenome.txt
 +$ metaphlan2 --input_type fastq <(zcat metagenome_1.fastq.gz metagenome_2.fastq.gz) > profiled_metagenome.txt
 ```
@@ -116,13 +108,13 @@ Description: Instead of setting mpa_dir bash variable the path to the
 MetaPhlAn 2 can also natively **handle paired-end metagenomes** (but does not use the paired-end information), and, more generally, metagenomes stored in multiple files (but you need to specify the --bowtie2out parameter):
 
 ```
- #!cmd
+ #!bash
 -$ metaphlan2.py metagenome_1.fastq,metagenome_2.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 --input_type fastq > profiled_metagenome.txt
 +$ metaphlan2 metagenome_1.fastq,metagenome_2.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 --input_type fastq > profiled_metagenome.txt
 ```
 
 For advanced options and other analysis types (such as strain tracking) please refer to the full command-line options.
-@@ -136,7 +131,7 @@ For advanced options and other analysis
+@@ -159,7 +153,7 @@ For advanced options and other analysis
 
 
 ```
@@ -131,7 +123,7 @@ Description: Instead of setting mpa_dir bash variable the path to the
                      {fastq,fasta,multifasta,multifastq,bowtie2out,sam}
                      [--mpa_pkl MPA_PKL] [--bowtie2db METAPHLAN_BOWTIE2_DB]
                      [--bt2_ps BowTie2 presets] [--bowtie2_exe BOWTIE2_EXE]
-@@ -161,7 +156,7 @@ AUTHORS: Nicola Segata (nicola.segata@un
+@@ -184,7 +178,7 @@ AUTHORS: Nicola Segata (nicola.segata@un
 
 COMMON COMMANDS
 
@@ -140,7 +132,7 @@ Description: Instead of setting mpa_dir bash variable the path to the
  main MetaPhlAn folder. Also BowTie2 should be in the system path with execution and read
  permissions, and Perl should be installed.
 
-@@ -172,32 +167,32 @@ strains in particular cases) present in
+@@ -195,32 +189,32 @@ strains in particular cases) present in
 relative abundance. This correspond to the default analysis type (--analysis_type rel_ab).
 
 *  Profiling a metagenome from raw reads:
@@ -184,7 +176,7 @@ Description: Instead of setting mpa_dir bash variable the path to the
 
 ------------------------------------------------------------------- 
  
-@@ -215,23 +210,23 @@ file saved during the execution of the d
+@@ -238,23 +232,23 @@ file saved during the execution of the d
 *  The following command will output the abundance of each marker with a RPK (reads per kil-base) 
    higher 0.0. (we are assuming that metagenome_outfmt.bz2 has been generated before as 
    shown above).
@@ -212,16 +204,16 @@ Description: Instead of setting mpa_dir bash variable the path to the
    the optional argument --min_ab specifies the minimum clade abundance for reporting the markers
 
 ------------------------------------------------------------------- 
-@@ -536,7 +531,7 @@ pickle.dump(db, ofile, pickle.HIGHEST_PR
+@@ -551,7 +545,7 @@ pickle.dump(db, ofile, pickle.HIGHEST_PR
 ofile.close()
 ```
 
-* To use the new database, switch to metaphlan2/db_v21 instead of metaphlan2/db_v20 when running metaphlan2.py with option "--mpa_pkl".
-+* To use the new database, switch to metaphlan2/db_v21 instead of metaphlan2/db_v20 when running metaphlan2 with option "--mpa_pkl".
+-* To use the new database, switch to metaphlan2/db_v21 instead of metaphlan2/db\_v20 when running metaphlan2.py with option "--mpa\_pkl".
+* To use the new database, switch to metaphlan2/db_v21 instead of metaphlan2/db\_v20 when running metaphlan2 with option "--mpa\_pkl".
 
 
- ##**Metagenomic strain-level population genomics**##
-@@ -614,7 +609,7 @@ for f in $(ls fastqs/*.bz2)
+ ## Metagenomic strain-level population genomics
+@@ -621,7 +615,7 @@ for f in $(ls fastqs/*.bz2)
 do
     echo "Running metaphlan2 on ${f}"
     bn=$(basename ${f} | cut -d . -f 1)
@@ -230,10 +222,10 @@ Description: Instead of setting mpa_dir bash variable the path to the
 done
 ```
 
-@@ -761,4 +756,4 @@ In the output folder, you can find the f
+@@ -761,4 +755,4 @@ In the output folder, you can find the f
 1. clade_name.fasta: the alignment file of all metagenomic strains.
 3. *.marker_pos: this file shows the starting position of each marker in the strains.
 3. *.info: this file shows the general information like the total length of the concatenated markers (full sequence length), number of used markers, etc.
-4. *.polymorphic: this file shows the statistics on the polymorphic site, where "sample" is the sample name, "percentage_of_polymorphic_sites" is the percentage of sites that are suspected to be polymorphic, "avg_freq" is the average frequency of the dominant alleles on all polymorphic sites, "avg_coverage" is the average coverage at all polymorphic sites.
+-4. *.polymorphic: this file shows the statistics on the polymorphic site, where "sample" is the sample name, "percentage\_of\_polymorphic_sites" is the percentage of sites that are suspected to be polymorphic, "avg\_freq" is the average frequency of the dominant alleles on all polymorphic sites, "avg\_coverage" is the average coverage at all polymorphic sites.
 \ No newline at end of file
-+4. *.polymorphic: this file shows the statistics on the polymorphic site, where "sample" is the sample name, "percentage_of_polymorphic_sites" is the percentage of sites that are suspected to be polymorphic, "avg_freq" is the average frequency of the dominant alleles on all polymorphic sites, "avg_coverage" is the average coverage at all polymorphic sites.
+4. *.polymorphic: this file shows the statistics on the polymorphic site, where "sample" is the sample name, "percentage\_of\_polymorphic_sites" is the percentage of sites that are suspected to be polymorphic, "avg\_freq" is the average frequency of the dominant alleles on all polymorphic sites, "avg\_coverage" is the average coverage at all polymorphic sites.
--- a/debian/patches/spelling.patch
+++ b/debian/patches/spelling.patch
@@ -4,7 +4,7 @@ Description: Spelling

 --- a/README.md
 +++ b/README.md
-@@ -315,7 +315,7 @@ Post-mapping arguments:
+@@ -337,7 +337,7 @@ Post-mapping arguments:
 Additional analysis types and arguments:
   -t ANALYSIS TYPE      Type of analysis to perform: 
                          * rel_ab: profiling a metagenomes in terms of relative abundances
@@ -13,20 +13,20 @@ Description: Spelling
                          * reads_map: mapping from reads to clades (only reads hitting a marker)
                          * clade_profiles: normalized marker counts for clades with at least a non-null marker
                          * marker_ab_table: normalized marker counts (only when > 0.0 and normalized by metagenome size if --nreads is specified)
-@@ -744,7 +744,7 @@ python ../strainphlan.py -h
+@@ -743,7 +743,7 @@ python ../strainphlan.py -h
 The default setting can be stringent for some cases where you have very few samples left in the phylogenetic tree. You can relax some parameters to add more samples back:
 
- 1. *marker_in_clade*: In each sample, the clades with the percentage of present markers less than this threshold are removed. Default "0.8". You can set this parameter to "0.5" to add some more samples.
-2. *sample_in_marker*: If the percentage of samples that a marker present in is less than this threhold, that marker is removed. Default "0.8". You can set this parameter to "0.5" to add some more samples.
-+2. *sample_in_marker*: If the percentage of samples that a marker present in is less than this threshold, that marker is removed. Default "0.8". You can set this parameter to "0.5" to add some more samples.
- 3. *N_in_marker*: The consensus markers with the percentage of N nucleotides greater than this threshold are removed. Default "0.2". You can set this parameter to "0.5" to add some more samples.
- 4. *gap_in_sample*: The samples with full sequences concatenated from all markers and having the percentage of gaps greater than this threshold will be removed. Default 0.2. You can set this parameter to "0.5" to add some more samples.
- 5. *relaxed_parameters*: use this option to automatically set the above parameters to add some more samples by accepting some more gaps, Ns, etc. This option is equivalent to set: marker_in_clade=0.5, sample_in_marker=0.5,                        N_in_marker=0.5, gap_in_sample=0.5. Default "False".
+ 1. *marker\_in\_clade*: In each sample, the clades with the percentage of present markers less than this threshold are removed. Default "0.8". You can set this parameter to "0.5" to add some more samples.
+-2. *sample\_in\_marker*: If the percentage of samples that a marker present in is less than this threhold, that marker is removed. Default "0.8". You can set this parameter to "0.5" to add some more samples.
+2. *sample\_in\_marker*: If the percentage of samples that a marker present in is less than this threshold, that marker is removed. Default "0.8". You can set this parameter to "0.5" to add some more samples.
+ 3. *N\_in\_marker*: The consensus markers with the percentage of N nucleotides greater than this threshold are removed. Default "0.2". You can set this parameter to "0.5" to add some more samples.
+ 4. *gap\_in\_sample*: The samples with full sequences concatenated from all markers and having the percentage of gaps greater than this threshold will be removed. Default 0.2. You can set this parameter to "0.5" to add some more samples.
+ 5. *relaxed\_parameters*: use this option to automatically set the above parameters to add some more samples by accepting some more gaps, Ns, etc. This option is equivalent to set: marker\_in\_clade=0.5, sample\_in\_marker=0.5, N\_in\_marker=0.5, gap\_in\_sample=0.5. Default "False".
 --- a/strainphlan.py
 +++ b/strainphlan.py
 @@ -328,7 +328,7 @@ def read_params():
-         required=False, 
-         default=['all'], 
+         required=False,
+         default=['all'],
         type=str,
 -        help='The clades (space seperated) for which the script will compute '\
 +        help='The clades (space separated) for which the script will compute '\
@@ -35,8 +35,8 @@ Description: Spelling
                 'file where each clade name is on a line will be read.'
 --- a/metaphlan2.py
 +++ b/metaphlan2.py
-@@ -555,7 +555,7 @@ def read_params(args):
-          default='rel_ab', help = 
+@@ -597,7 +597,7 @@ def read_params(args):
+          default='rel_ab', help =
          "Type of analysis to perform: \n"
          " * rel_ab: profiling a metagenomes in terms of relative abundances\n"
 -         " * rel_ab_w_read_stats: profiling a metagenomes in terms of relative abundances and estimate the number of reads comming from each clade.\n"

--- a/debian/watch
+++ b/debian/watch
 version=4

-opts="repacksuffix=+ds,dversionmangle=s/\+ds//g,repack,compression=xz" \
-  https://bitbucket.org/biobakery/metaphlan2/downloads?tab=tags .*/(\d[\d.]+)\.tar\.bz2
+#opts="repacksuffix=+ds,dversionmangle=s/\+ds//g,repack,compression=xz" \
+https://bitbucket.org/biobakery/metaphlan2/downloads?tab=tags .*/(\d[\d.]+)\.tar\.bz2
--- a/metaphlan2.py
+++ b/metaphlan2.py
--- a/plugin_setup.py
+++ b/plugin_setup.py
+# MetaPhlAn2 Plugin
+# Author: Francesco Asnicar
+# This module creates the QIIME2 plugin instance for MetaPhlAn2 and
+# registers functions for profiling single and paired fastq files.
+
+
+from qiime2.plugin import Plugin, Int
+from q2_types.sample_data import SampleData
+from q2_types.per_sample_sequences import SequencesWithQuality
+from q2_types.per_sample_sequences import PairedEndSequencesWithQuality
+from q2_types.feature_table import FeatureTable
+from q2_types.feature_table import Frequency
+import metaphlan2
+
+
+plugin = Plugin(
+    name='metaphlan2',
+    version='2.7.5',
+    website='http://segatalab.cibio.unitn.it/tools/metaphlan2/',
+    user_support_text='metaphlan-users@googlegroups.com',
+    package='metaphlan2',
+    citation_text=('Truong DT, Franzosa EA, Tickle TL, Scholz M, Weingart G, '
+                   'Pasolli E, Tett A, Huttenhower C, Segata N. MetaPhlAn2 '
+                   'for enhanced metagenomic taxonomic profiling. Nature '
+                   'Methods. 2015 Oct 1;12(10):902-3'),
+    description=('MetaPhlAn is a computational tool for profiling the '
+                 'composition of microbial communities (Bacteria, Archaea, '
+                 'Eukaryotes, and Viruses) from metagenomic shotgun '
+                 'sequencing data with species level resolution'),
+    short_description='MetaPhlAn2 for enhanced metagenomic taxonomic profiling'
+)
+
+plugin.methods.register_function(
+    function=metaphlan2._metaphlan2.profile_single_fastq,
+
+    inputs={'raw_data': SampleData[SequencesWithQuality]},
+    input_descriptions={'raw_data': ('metagenomic shotgun sequencing data')},
+
+    parameters={'nproc': Int},
+    parameter_descriptions={'nproc': ('The number of CPUs to use for '
+                                      'parallelizing the mapping, default 1 '
+                                      '(no parallelization)')},
+
+    outputs=[('biom_table', FeatureTable[Frequency])],
+    output_descriptions={'biom_table': ('Table relative abundances of the '
+                                        'species found in the input')},
+
+    name='MetaPhlAn2 taxonomic profiling',
+    description=(('MetaPhlAn is a computational tool for profiling the '
+                  'composition of microbial communities (Bacteria, Archaea, '
+                  'Eukaryotes, and Viruses) from metagenomic shotgun '
+                  'sequencing data with species level resolution'))
+)
+
+plugin.methods.register_function(
+    function=metaphlan2._metaphlan2.profile_paired_fastq,
+
+    inputs={'raw_data': SampleData[PairedEndSequencesWithQuality]},
+    input_descriptions={'raw_data': ('metagenomic shotgun sequencing data')},
+
+    parameters={'nproc': Int},
+    parameter_descriptions={'nproc': 'The number of CPUs to use for '
+                                     'parallelizing the mapping, default 1 '
+                                     '(no parallelization)'},
+
+    outputs=[('biom_table', FeatureTable[Frequency])],
+    output_descriptions={'biom_table': ('TAB-separated text file containing '
+                                        'relative abundances of the species '
+                                        'found in the input')},
+
+    name='MetaPhlAn2 taxonomic profiling',
+    description=('MetaPhlAn is a computational tool for profiling the '
+                 'composition of microbial communities (Bacteria, Archaea, '
+                 'Eukaryotes, and Viruses) from metagenomic shotgun '
+                 'sequencing data with species level resolution')
+)
--- a/strainphlan.py
+++ b/strainphlan.py
--- a/strainphlan_src/add_metadata_tree.py
+++ b/strainphlan_src/add_metadata_tree.py
@@ -11,6 +11,7 @@ import copy
 import ConfigParser
 import dendropy
 import numpy
+import ipdb


 def read_params():
@@ -70,6 +71,7 @@ def main(args):
        tree = dendropy.Tree(stream=open(ifn_tree, 'r'), schema='newick')
        for node in tree.leaf_nodes():
            sample = node.get_node_str().strip("'")
+            sample = sample.replace(' ', '_')
            sample = sample.replace(args['string_to_remove'], '')
            prefixes = [prefix for prefix in 
                            ['k__', 'p__', 'c__', 'o__',