Skip to content
Commits on Source (9)
repo: b4e7c5505112b08d33dd30f4788429ba023e67f0
node: c43e40a443edbd3c4cac7349d2679540578096f5
repo: 092c2fe2278cb7f0b18d81faeb4aab98b89dc096
node: b2f9b3286d4be376805e3b5c26cf141ed375c605
branch: default
tag: 2.6.0
tag: 2.7.5
syntax: glob
databases/
*.pyc
build/
dist/
*.egg-info/
f8823b8162ddea6533866afd27d5ed1ce6ff22e0 utils/export2graphlan
0d8cb18ce9996e7ce4043a00294aeb2ed9bfa5f2 utils/hclust2
c168a100f37e23e2c110849a8d91fac8da49f5bd utils/export2graphlan
35dfd725e7f024fc6d0edef0cc191c7963108787 utils/hclust2
b4e7c5505112b08d33dd30f4788429ba023e67f0 2.0_alpha1
60d254d499e2dd1a8b1cfe344236efa47f823ec6 2.0_beta1
1b6df65b5a3e9feed0179f855c11fd197fe9a64f 2.0_beta2
12cceaad3493085c4497898aaeff691913ddb633 2.0_beta3
616a7debe7937672940130e6c5b26a9ef9e76fcd 2.0.0
3959b668bbed6150698b594cbbc30a924e5d30e1 2.1.0
0ef29ae841f52b53176ca264fb9f52f98713eb3c 2.2.0
5424bb911dfcdb7212ea0949d4faeb6e69cfa61f 2.3.0
6f2a1673af8565e93fb8e69238141889b7c87361 2.5.0
092c2fe2278cb7f0b18d81faeb4aab98b89dc096 2.0_alpha1
c5c90e145ff40fb2fc3827651d572aa9a724ba31 2.0_beta1
7168beb9750d223736888cefa387252d019f6a10 2.0_beta2
46a5e65865233da6d05ebded2700f1854bab9878 2.0_beta3
9943159669e10c4943c1d3440866e93ce536617a 2.0.0
56fbf765ffaa2b851c0bbd17f26083a6710a404e 2.1.0
97d782790746b6a1124fac7cadeca7bfa9f797cf 2.2.0
f3325ec17035523cf9f2ea7736afcc119bd94a89 2.3.0
e424931b4d94d50cf62381c79c335935415b33b9 2.5.0
6d6433aa0f6856bff2e84a757b4084736bc3738f 2.6.0
8963e486f79043c79a299f7a684e4550b0115c32 2.7.0
d8ab9ca4244c09a7a4995042a99fbba1e3598ac0 2.7.1
a1fe0d15320c04f69d56f1b7dd31cff972a7b8df 2.7.2
This diff is collapsed.
from metaphlan2 import metaphlan2
from ._metaphlan2 import profile_single_fastq
from ._metaphlan2 import profile_paired_fastq
__author__ = metaphlan2.__author__
__version__ = metaphlan2.__version__
__date__ = metaphlan2.__date__
__all__ = ['profile_single_fastq', 'profile_paired_fastq']
# Run MetaPhlAn2
# Author: Francesco Asnicar
# This module defines the functions which run MetaPhlAn2 on
# single and paired fastq data.
import subprocess as sb
from q2_types.per_sample_sequences import SingleLanePerSampleSingleEndFastqDirFmt
from q2_types.per_sample_sequences import SingleLanePerSamplePairedEndFastqDirFmt
import tempfile
import biom
import os
def metaphlan2_helper(raw_data, nproc, input_type, output_file, verbose=True):
cmd = ['metaphlan2.py', str(raw_data), '--input_type', str(input_type),
'--biom', str(output_file), '--nproc', str(nproc)]
if verbose:
print("\nRunning external command line application. This may print "
"messages to stdout and/or stderr.")
print("Command: {}".format(' '.join(cmd)), end='\n\n')
sb.run(cmd, check=True)
def profile_single_fastq(raw_data: SingleLanePerSampleSingleEndFastqDirFmt,
nproc: int=1) -> biom.Table:
output_biom = None
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_output_biom = os.path.join(tmp_dir, 'mp2_tmp_output.biom')
metaphlan2_helper(raw_data, nproc, 'multifastq', tmp_output_biom)
output_biom = biom.load_table(tmp_output_biom)
return output_biom
def profile_paired_fastq(raw_data: SingleLanePerSamplePairedEndFastqDirFmt,
nproc: int=1) -> biom.Table:
output_biom = None
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_output_biom = os.path.join(tmp_dir, 'mp2_tmp_output.biom')
metaphlan2_helper(raw_data, nproc, 'multifastq', tmp_output_biom)
output_biom = biom.load_table(tmp_output_biom)
return output_biom
metaphlan2 (2.7.5-1) unstable; urgency=medium
* New upstream version (no data shiped with this archive any more)
* Standards-Version: 4.1.3
* debhelper 11
* db_v20/mpa_v20_m200.pkl was removed from upstream source so we can not
install this file
-- Andreas Tille <tille@debian.org> Fri, 16 Feb 2018 11:12:16 +0100
metaphlan2 (2.6.0+ds-2) unstable; urgency=medium
* Fix location of marker information file
......
......@@ -3,12 +3,12 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.
Uploaders: Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 10),
Build-Depends: debhelper (>= 11~),
python-all,
dh-python,
pandoc,
bowtie2
Standards-Version: 3.9.8
Standards-Version: 4.1.3
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/metaphlan2.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/metaphlan2.git
Homepage: https://bitbucket.org/biobakery/metaphlan2
......
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: MetaPhlAn2
Upstream-Contact: Nicola Segata <nicola.segata@unitn.it>
Source: https://bitbucket.org/biobakery/metaphlan2/downloads
Files-Excluded: */*.bt2
Files: *
Copyright: 2012-2016 Duy Tin Truong, Nicola Segata and Curtis Huttenhower
Copyright: 2012-2018 Duy Tin Truong, Nicola Segata and Curtis Huttenhower
License: expat
Files: debian/*
Copyright: © 2016 Andreas Tille <tille@debian.org>
Copyright: 2016-2018 Andreas Tille <tille@debian.org>
License: expat
License: expat
......
*.py usr/share/metaphlan2
utils usr/share/metaphlan2
debian/bin usr
db_v20 usr/share/metaphlan2
......@@ -7,16 +7,16 @@ Description: Instead of setting mpa_dir bash variable the path to the
--- a/metaphlan2.py
+++ b/metaphlan2.py
@@ -385,7 +385,7 @@ def read_params(args):
@@ -417,7 +417,7 @@ def read_params(args):
"* You can also provide an externally BowTie2-mapped SAM if you specify this format with \n"
" --input_type. Two steps: first apply BowTie2 and then feed MetaPhlAn2 with the obtained sam:\n"
- "$ bowtie2 --sam-no-hd --sam-no-sq --no-unal --very-sensitive -S metagenome.sam -x ${mpa_dir}/db_v20/mpa_v20_m200 -U metagenome.fastq\n"
+ "$ bowtie2 --sam-no-hd --sam-no-sq --no-unal --very-sensitive -S metagenome.sam -x /usr/share/metaphlan2/db_v20/mpa_v20_m200 -U metagenome.fastq\n"
"$ metaphlan2.py metagenome.sam --input_type sam > profiled_metagenome.txt\n\n"
"* Multiple alternative ways to pass the input are also available:\n"
@@ -1107,7 +1107,7 @@ if __name__ == '__main__':
@@ -1391,7 +1391,7 @@ def metaphlan2():
# check for the mpa_pkl file
if not os.path.isfile(pars['mpa_pkl']):
sys.stderr.write("Error: Unable to find the mpa_pkl file at: " + pars['mpa_pkl'] +
......@@ -24,25 +24,17 @@ Description: Instead of setting mpa_dir bash variable the path to the
+ "\nExpecting location /usr/share/metaphlan2/db_v20/mpa_v20_m200.pkl "
"\nSelect the file location with the option --mpa_pkl.\n"
"Exiting...\n\n")
sys.exit(1)
@@ -1155,7 +1155,7 @@ if __name__ == '__main__':
sys.stderr.write( "No MetaPhlAn BowTie2 database found "
"[--bowtie2db option]! "
"(or wrong path provided)."
- "\nExpecting location ${mpa_dir}/db_v20/map_v20_m200 "
+ "\nExpecting location /usr/share/metaphlan2/db_v20/mpa_v20_m200 "
"\nExiting... " )
sys.exit(1)
sys.exit(1)
--- a/README.md
+++ b/README.md
@@ -60,32 +60,27 @@ Cloning the repository via the following
@@ -82,33 +82,27 @@ Cloning the repository via the following
This section presents some basic usages of MetaPhlAn2, for more advanced usages, please see at [its wiki](https://bitbucket.org/biobakery/biobakery/wiki/metaphlan2).
-We assume here that ``metaphlan2.py`` is in the system path and that ``mpa_dir`` bash variable contains the main MetaPhlAn folder. You can set this two variables moving to your MetaPhlAn2 local folder and type:
-
-```
-#!cmd
-#!bash
-$ export PATH=`pwd`:$PATH
-$ export mpa_dir=`pwd`
-```
......@@ -51,7 +43,7 @@ Description: Instead of setting mpa_dir bash variable the path to the
Here is the basic example to profile a metagenome from raw reads (requires BowTie2 in the system path with execution and read permissions, Perl installed).
```
#!cmd
#!bash
-$ metaphlan2.py metagenome.fastq --input_type fastq > profiled_metagenome.txt
+$ metaphlan2 metagenome.fastq --input_type fastq > profiled_metagenome.txt
```
......@@ -59,7 +51,7 @@ Description: Instead of setting mpa_dir bash variable the path to the
It is highly recommended to save the intermediate BowTie2 output for re-running MetaPhlAn extremely quickly (--bowtie2out), and use multiple CPUs (--nproc) if available:
```
#!cmd
#!bash
-$ metaphlan2.py metagenome.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 --input_type fastq > profiled_metagenome.txt
+$ metaphlan2 metagenome.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 --input_type fastq > profiled_metagenome.txt
```
......@@ -67,15 +59,15 @@ Description: Instead of setting mpa_dir bash variable the path to the
If you already mapped your metagenome against the marker DB (using a previous MetaPhlAn run), you can obtain the results in few seconds by using the previously saved --bowtie2out file and specifying the input (--input_type bowtie2out):
```
#!cmd
#!bash
-$ metaphlan2.py metagenome.bowtie2.bz2 --nproc 5 --input_type bowtie2out > profiled_metagenome.txt
+$ metaphlan2 metagenome.bowtie2.bz2 --nproc 5 --input_type bowtie2out > profiled_metagenome.txt
```
You can also provide an externally BowTie2-mapped SAM if you specify this format with --input_type. Two steps here: first map your metagenome with BowTie2 and then feed MetaPhlAn2 with the obtained sam:
@@ -93,41 +88,41 @@ You can also provide an externally BowTi
@@ -116,41 +110,41 @@ You can also provide an externally BowTi
```
#!cmd
#!bash
$ bowtie2 --sam-no-hd --sam-no-sq --no-unal --very-sensitive -S metagenome.sam -x ${mpa_dir}/db_v20/mpa_v20_m200 -U metagenome.fastq
-$ metaphlan2.py metagenome.sam --input_type sam > profiled_metagenome.txt
+$ metaphlan2 metagenome.sam --input_type sam > profiled_metagenome.txt
......@@ -84,31 +76,31 @@ Description: Instead of setting mpa_dir bash variable the path to the
In order to make MetaPhlAn 2 easily compatible with complex metagenomic pipeline, there are now multiple alternative ways to pass the input:
```
#!cmd
#!bash
-$ cat metagenome.fastq | metaphlan2.py --input_type fastq > profiled_metagenome.txt
+$ cat metagenome.fastq | metaphlan2 --input_type fastq > profiled_metagenome.txt
```
```
#!cmd
#!bash
-$ tar xjf metagenome.tar.bz2 --to-stdout | metaphlan2.py --input_type fastq --bowtie2db ${mpa_dir}/db_v20/mpa_v20_m200 > profiled_metagenome.txt
+$ tar xjf metagenome.tar.bz2 --to-stdout | metaphlan2 --input_type fastq --bowtie2db ${mpa_dir}/db_v20/mpa_v20_m200 > profiled_metagenome.txt
```
```
#!cmd
#!bash
-$ metaphlan2.py --input_type fastq < metagenome.fastq > profiled_metagenome.txt
+$ metaphlan2 --input_type fastq < metagenome.fastq > profiled_metagenome.txt
```
```
#!cmd
#!bash
-$ metaphlan2.py --input_type fastq <(bzcat metagenome.fastq.bz2) > profiled_metagenome.txt
+$ metaphlan2 --input_type fastq <(bzcat metagenome.fastq.bz2) > profiled_metagenome.txt
```
```
#!cmd
#!bash
-$ metaphlan2.py --input_type fastq <(zcat metagenome_1.fastq.gz metagenome_2.fastq.gz) > profiled_metagenome.txt
+$ metaphlan2 --input_type fastq <(zcat metagenome_1.fastq.gz metagenome_2.fastq.gz) > profiled_metagenome.txt
```
......@@ -116,13 +108,13 @@ Description: Instead of setting mpa_dir bash variable the path to the
MetaPhlAn 2 can also natively **handle paired-end metagenomes** (but does not use the paired-end information), and, more generally, metagenomes stored in multiple files (but you need to specify the --bowtie2out parameter):
```
#!cmd
#!bash
-$ metaphlan2.py metagenome_1.fastq,metagenome_2.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 --input_type fastq > profiled_metagenome.txt
+$ metaphlan2 metagenome_1.fastq,metagenome_2.fastq --bowtie2out metagenome.bowtie2.bz2 --nproc 5 --input_type fastq > profiled_metagenome.txt
```
For advanced options and other analysis types (such as strain tracking) please refer to the full command-line options.
@@ -136,7 +131,7 @@ For advanced options and other analysis
@@ -159,7 +153,7 @@ For advanced options and other analysis
```
......@@ -131,7 +123,7 @@ Description: Instead of setting mpa_dir bash variable the path to the
{fastq,fasta,multifasta,multifastq,bowtie2out,sam}
[--mpa_pkl MPA_PKL] [--bowtie2db METAPHLAN_BOWTIE2_DB]
[--bt2_ps BowTie2 presets] [--bowtie2_exe BOWTIE2_EXE]
@@ -161,7 +156,7 @@ AUTHORS: Nicola Segata (nicola.segata@un
@@ -184,7 +178,7 @@ AUTHORS: Nicola Segata (nicola.segata@un
COMMON COMMANDS
......@@ -140,7 +132,7 @@ Description: Instead of setting mpa_dir bash variable the path to the
main MetaPhlAn folder. Also BowTie2 should be in the system path with execution and read
permissions, and Perl should be installed.
@@ -172,32 +167,32 @@ strains in particular cases) present in
@@ -195,32 +189,32 @@ strains in particular cases) present in
relative abundance. This correspond to the default analysis type (--analysis_type rel_ab).
* Profiling a metagenome from raw reads:
......@@ -184,7 +176,7 @@ Description: Instead of setting mpa_dir bash variable the path to the
-------------------------------------------------------------------
@@ -215,23 +210,23 @@ file saved during the execution of the d
@@ -238,23 +232,23 @@ file saved during the execution of the d
* The following command will output the abundance of each marker with a RPK (reads per kil-base)
higher 0.0. (we are assuming that metagenome_outfmt.bz2 has been generated before as
shown above).
......@@ -212,16 +204,16 @@ Description: Instead of setting mpa_dir bash variable the path to the
the optional argument --min_ab specifies the minimum clade abundance for reporting the markers
-------------------------------------------------------------------
@@ -536,7 +531,7 @@ pickle.dump(db, ofile, pickle.HIGHEST_PR
@@ -551,7 +545,7 @@ pickle.dump(db, ofile, pickle.HIGHEST_PR
ofile.close()
```
-* To use the new database, switch to metaphlan2/db_v21 instead of metaphlan2/db_v20 when running metaphlan2.py with option "--mpa_pkl".
+* To use the new database, switch to metaphlan2/db_v21 instead of metaphlan2/db_v20 when running metaphlan2 with option "--mpa_pkl".
-* To use the new database, switch to metaphlan2/db_v21 instead of metaphlan2/db\_v20 when running metaphlan2.py with option "--mpa\_pkl".
+* To use the new database, switch to metaphlan2/db_v21 instead of metaphlan2/db\_v20 when running metaphlan2 with option "--mpa\_pkl".
##**Metagenomic strain-level population genomics**##
@@ -614,7 +609,7 @@ for f in $(ls fastqs/*.bz2)
## Metagenomic strain-level population genomics
@@ -621,7 +615,7 @@ for f in $(ls fastqs/*.bz2)
do
echo "Running metaphlan2 on ${f}"
bn=$(basename ${f} | cut -d . -f 1)
......@@ -230,10 +222,10 @@ Description: Instead of setting mpa_dir bash variable the path to the
done
```
@@ -761,4 +756,4 @@ In the output folder, you can find the f
@@ -761,4 +755,4 @@ In the output folder, you can find the f
1. clade_name.fasta: the alignment file of all metagenomic strains.
3. *.marker_pos: this file shows the starting position of each marker in the strains.
3. *.info: this file shows the general information like the total length of the concatenated markers (full sequence length), number of used markers, etc.
-4. *.polymorphic: this file shows the statistics on the polymorphic site, where "sample" is the sample name, "percentage_of_polymorphic_sites" is the percentage of sites that are suspected to be polymorphic, "avg_freq" is the average frequency of the dominant alleles on all polymorphic sites, "avg_coverage" is the average coverage at all polymorphic sites.
-4. *.polymorphic: this file shows the statistics on the polymorphic site, where "sample" is the sample name, "percentage\_of\_polymorphic_sites" is the percentage of sites that are suspected to be polymorphic, "avg\_freq" is the average frequency of the dominant alleles on all polymorphic sites, "avg\_coverage" is the average coverage at all polymorphic sites.
\ No newline at end of file
+4. *.polymorphic: this file shows the statistics on the polymorphic site, where "sample" is the sample name, "percentage_of_polymorphic_sites" is the percentage of sites that are suspected to be polymorphic, "avg_freq" is the average frequency of the dominant alleles on all polymorphic sites, "avg_coverage" is the average coverage at all polymorphic sites.
+4. *.polymorphic: this file shows the statistics on the polymorphic site, where "sample" is the sample name, "percentage\_of\_polymorphic_sites" is the percentage of sites that are suspected to be polymorphic, "avg\_freq" is the average frequency of the dominant alleles on all polymorphic sites, "avg\_coverage" is the average coverage at all polymorphic sites.
......@@ -4,7 +4,7 @@ Description: Spelling
--- a/README.md
+++ b/README.md
@@ -315,7 +315,7 @@ Post-mapping arguments:
@@ -337,7 +337,7 @@ Post-mapping arguments:
Additional analysis types and arguments:
-t ANALYSIS TYPE Type of analysis to perform:
* rel_ab: profiling a metagenomes in terms of relative abundances
......@@ -13,20 +13,20 @@ Description: Spelling
* reads_map: mapping from reads to clades (only reads hitting a marker)
* clade_profiles: normalized marker counts for clades with at least a non-null marker
* marker_ab_table: normalized marker counts (only when > 0.0 and normalized by metagenome size if --nreads is specified)
@@ -744,7 +744,7 @@ python ../strainphlan.py -h
@@ -743,7 +743,7 @@ python ../strainphlan.py -h
The default setting can be stringent for some cases where you have very few samples left in the phylogenetic tree. You can relax some parameters to add more samples back:
1. *marker_in_clade*: In each sample, the clades with the percentage of present markers less than this threshold are removed. Default "0.8". You can set this parameter to "0.5" to add some more samples.
-2. *sample_in_marker*: If the percentage of samples that a marker present in is less than this threhold, that marker is removed. Default "0.8". You can set this parameter to "0.5" to add some more samples.
+2. *sample_in_marker*: If the percentage of samples that a marker present in is less than this threshold, that marker is removed. Default "0.8". You can set this parameter to "0.5" to add some more samples.
3. *N_in_marker*: The consensus markers with the percentage of N nucleotides greater than this threshold are removed. Default "0.2". You can set this parameter to "0.5" to add some more samples.
4. *gap_in_sample*: The samples with full sequences concatenated from all markers and having the percentage of gaps greater than this threshold will be removed. Default 0.2. You can set this parameter to "0.5" to add some more samples.
5. *relaxed_parameters*: use this option to automatically set the above parameters to add some more samples by accepting some more gaps, Ns, etc. This option is equivalent to set: marker_in_clade=0.5, sample_in_marker=0.5, N_in_marker=0.5, gap_in_sample=0.5. Default "False".
1. *marker\_in\_clade*: In each sample, the clades with the percentage of present markers less than this threshold are removed. Default "0.8". You can set this parameter to "0.5" to add some more samples.
-2. *sample\_in\_marker*: If the percentage of samples that a marker present in is less than this threhold, that marker is removed. Default "0.8". You can set this parameter to "0.5" to add some more samples.
+2. *sample\_in\_marker*: If the percentage of samples that a marker present in is less than this threshold, that marker is removed. Default "0.8". You can set this parameter to "0.5" to add some more samples.
3. *N\_in\_marker*: The consensus markers with the percentage of N nucleotides greater than this threshold are removed. Default "0.2". You can set this parameter to "0.5" to add some more samples.
4. *gap\_in\_sample*: The samples with full sequences concatenated from all markers and having the percentage of gaps greater than this threshold will be removed. Default 0.2. You can set this parameter to "0.5" to add some more samples.
5. *relaxed\_parameters*: use this option to automatically set the above parameters to add some more samples by accepting some more gaps, Ns, etc. This option is equivalent to set: marker\_in\_clade=0.5, sample\_in\_marker=0.5, N\_in\_marker=0.5, gap\_in\_sample=0.5. Default "False".
--- a/strainphlan.py
+++ b/strainphlan.py
@@ -328,7 +328,7 @@ def read_params():
required=False,
default=['all'],
required=False,
default=['all'],
type=str,
- help='The clades (space seperated) for which the script will compute '\
+ help='The clades (space separated) for which the script will compute '\
......@@ -35,8 +35,8 @@ Description: Spelling
'file where each clade name is on a line will be read.'
--- a/metaphlan2.py
+++ b/metaphlan2.py
@@ -555,7 +555,7 @@ def read_params(args):
default='rel_ab', help =
@@ -597,7 +597,7 @@ def read_params(args):
default='rel_ab', help =
"Type of analysis to perform: \n"
" * rel_ab: profiling a metagenomes in terms of relative abundances\n"
- " * rel_ab_w_read_stats: profiling a metagenomes in terms of relative abundances and estimate the number of reads comming from each clade.\n"
......
version=4
opts="repacksuffix=+ds,dversionmangle=s/\+ds//g,repack,compression=xz" \
https://bitbucket.org/biobakery/metaphlan2/downloads?tab=tags .*/(\d[\d.]+)\.tar\.bz2
#opts="repacksuffix=+ds,dversionmangle=s/\+ds//g,repack,compression=xz" \
https://bitbucket.org/biobakery/metaphlan2/downloads?tab=tags .*/(\d[\d.]+)\.tar\.bz2
This diff is collapsed.
# MetaPhlAn2 Plugin
# Author: Francesco Asnicar
# This module creates the QIIME2 plugin instance for MetaPhlAn2 and
# registers functions for profiling single and paired fastq files.
from qiime2.plugin import Plugin, Int
from q2_types.sample_data import SampleData
from q2_types.per_sample_sequences import SequencesWithQuality
from q2_types.per_sample_sequences import PairedEndSequencesWithQuality
from q2_types.feature_table import FeatureTable
from q2_types.feature_table import Frequency
import metaphlan2
plugin = Plugin(
name='metaphlan2',
version='2.7.5',
website='http://segatalab.cibio.unitn.it/tools/metaphlan2/',
user_support_text='metaphlan-users@googlegroups.com',
package='metaphlan2',
citation_text=('Truong DT, Franzosa EA, Tickle TL, Scholz M, Weingart G, '
'Pasolli E, Tett A, Huttenhower C, Segata N. MetaPhlAn2 '
'for enhanced metagenomic taxonomic profiling. Nature '
'Methods. 2015 Oct 1;12(10):902-3'),
description=('MetaPhlAn is a computational tool for profiling the '
'composition of microbial communities (Bacteria, Archaea, '
'Eukaryotes, and Viruses) from metagenomic shotgun '
'sequencing data with species level resolution'),
short_description='MetaPhlAn2 for enhanced metagenomic taxonomic profiling'
)
plugin.methods.register_function(
function=metaphlan2._metaphlan2.profile_single_fastq,
inputs={'raw_data': SampleData[SequencesWithQuality]},
input_descriptions={'raw_data': ('metagenomic shotgun sequencing data')},
parameters={'nproc': Int},
parameter_descriptions={'nproc': ('The number of CPUs to use for '
'parallelizing the mapping, default 1 '
'(no parallelization)')},
outputs=[('biom_table', FeatureTable[Frequency])],
output_descriptions={'biom_table': ('Table relative abundances of the '
'species found in the input')},
name='MetaPhlAn2 taxonomic profiling',
description=(('MetaPhlAn is a computational tool for profiling the '
'composition of microbial communities (Bacteria, Archaea, '
'Eukaryotes, and Viruses) from metagenomic shotgun '
'sequencing data with species level resolution'))
)
plugin.methods.register_function(
function=metaphlan2._metaphlan2.profile_paired_fastq,
inputs={'raw_data': SampleData[PairedEndSequencesWithQuality]},
input_descriptions={'raw_data': ('metagenomic shotgun sequencing data')},
parameters={'nproc': Int},
parameter_descriptions={'nproc': 'The number of CPUs to use for '
'parallelizing the mapping, default 1 '
'(no parallelization)'},
outputs=[('biom_table', FeatureTable[Frequency])],
output_descriptions={'biom_table': ('TAB-separated text file containing '
'relative abundances of the species '
'found in the input')},
name='MetaPhlAn2 taxonomic profiling',
description=('MetaPhlAn is a computational tool for profiling the '
'composition of microbial communities (Bacteria, Archaea, '
'Eukaryotes, and Viruses) from metagenomic shotgun '
'sequencing data with species level resolution')
)
This diff is collapsed.
......@@ -11,6 +11,7 @@ import copy
import ConfigParser
import dendropy
import numpy
import ipdb
def read_params():
......@@ -70,6 +71,7 @@ def main(args):
tree = dendropy.Tree(stream=open(ifn_tree, 'r'), schema='newick')
for node in tree.leaf_nodes():
sample = node.get_node_str().strip("'")
sample = sample.replace(' ', '_')
sample = sample.replace(args['string_to_remove'], '')
prefixes = [prefix for prefix in
['k__', 'p__', 'c__', 'o__',
......