Skip to content
Commits on Source (3)
q2-demux (2019.10.0-1) UNRELEASED; urgency=medium
* New upstream version
-- Liubov Chuprikova <chuprikovalv@gmail.com> Sun, 29 Dec 2019 18:12:25 +0300
q2-demux (2019.4.1-1) unstable; urgency=medium
* Initial release (Closes: #930152)
......
......@@ -9,6 +9,7 @@
from ._demux import emp_single, emp_paired
from ._subsample import subsample_single, subsample_paired
from ._summarize import summarize
from ._filter import filter_samples
from ._version import get_versions
......@@ -16,4 +17,4 @@ __version__ = get_versions()['version']
del get_versions
__all__ = ['emp_single', 'emp_paired', 'summarize',
'subsample_single', 'subsample_paired']
'subsample_single', 'subsample_paired', 'filter_samples']
# ----------------------------------------------------------------------------
# Copyright (c) 2016-2019, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os
import pandas as pd
from qiime2 import Metadata
from qiime2.util import duplicate
from q2_types.per_sample_sequences import \
CasavaOneEightSingleLanePerSampleDirFmt
from ._summarize import _PlotQualView
def filter_samples(demux: _PlotQualView, metadata: Metadata,
where: str = None, exclude_ids: bool = False) \
-> CasavaOneEightSingleLanePerSampleDirFmt:
results = CasavaOneEightSingleLanePerSampleDirFmt()
paired = demux.paired
samples = demux.directory_format
ids_to_keep = metadata.get_ids(where=where)
if not ids_to_keep:
raise ValueError('No filtering requested.')
manifest = samples.manifest.view(pd.DataFrame)
if exclude_ids:
ids_to_keep = set(manifest.index) - set(ids_to_keep)
try:
for id in ids_to_keep:
forward = manifest.loc[id].forward
duplicate(forward, os.path.join(str(results),
os.path.split(forward)[1]))
if paired:
reverse = manifest.loc[id].reverse
duplicate(reverse, os.path.join(str(results),
os.path.split(reverse)[1]))
except KeyError:
raise ValueError(f'{id!r} is not a sample present in the '
'demultiplexed data.')
return results
......@@ -25,7 +25,7 @@ TEMPLATES = pkg_resources.resource_filename('q2_demux', '_summarize')
def _decode_qual_to_phred33(qual_str):
# this function is adapted from scikit-bio
qual = np.fromstring(qual_str, dtype=np.uint8) - 33
qual = np.frombuffer(qual_str.encode('ascii'), dtype=np.uint8) - 33
return qual
......
......@@ -23,9 +23,9 @@ def get_keywords():
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
git_refnames = " (tag: 2019.4.1)"
git_full = "7d08f111ea34d99d7eb77a3583eaf839ecfde05b"
git_date = "2019-05-08 16:42:34 -0700"
git_refnames = " (tag: 2019.10.0)"
git_full = "aec76a7d22d7126b57b99da259d752e72e99f148"
git_date = "2019-11-01 01:04:23 +0000"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
......
......@@ -9,8 +9,10 @@
import importlib
from qiime2.plugin import (
Plugin, MetadataColumn, Categorical, Bool, Int, Float, Range, Citations
Plugin, Metadata, MetadataColumn, Categorical, Bool, Str, Int, Float,
Range, Citations, TypeMatch
)
from q2_types.sample_data import SampleData
from q2_types.per_sample_sequences import (
SequencesWithQuality, PairedEndSequencesWithQuality,
......@@ -228,4 +230,43 @@ plugin.methods.register_function(
'sequences after subsampling.')
)
T = TypeMatch([SequencesWithQuality, PairedEndSequencesWithQuality,
JoinedSequencesWithQuality])
plugin.methods.register_function(
function=q2_demux.filter_samples,
inputs={'demux': SampleData[T]},
parameters={'metadata': Metadata,
'where': Str,
'exclude_ids': Bool},
outputs=[
('filtered_demux', SampleData[T])
],
input_descriptions={
'demux': 'The demultiplexed data from which samples should be '
'filtered.'
},
parameter_descriptions={
'metadata': 'Sample metadata indicating which sample ids to filter. '
'The optional `where` parameter may be used to filter ids '
'based on specified conditions in the metadata. The '
'optional `exclude_ids` parameter may be used to exclude '
'the ids specified in the metadata from the filter.',
'where': 'Optional SQLite WHERE clause specifying sample metadata '
'criteria that must be met to be included in the filtered '
'data. If not provided, all samples in `metadata` that are '
'also in the demultiplexed data will be retained.',
'exclude_ids': 'Defaults to False. If True, the samples selected by '
'the `metadata` and optional `where` parameter will be '
'excluded from the filtered data.',
},
output_descriptions={
'filtered_demux': 'Filtered demultiplexed data.'
},
name='Filter samples out of demultiplexed data.',
description='Filter samples indicated in given metadata out of '
'demultiplexed data. Specific samples can be further selected '
'with the WHERE clause, and the `exclude_ids` parameter '
'allows for filtering of all samples not specified.',
)
importlib.import_module('q2_demux._transformer')
sample-id,filename,direction
sample1,sample1_1_L001_R1_001.fastq.gz,forward
sample1,sample1_1_L001_R2_001.fastq.gz,reverse
sample2,sample2_2_L001_R1_001.fastq.gz,forward
sample2,sample2_2_L001_R2_001.fastq.gz,reverse
sample-id,filename,direction
# direction is not meaningful in this file as these
# data may be derived from forward, reverse, or
# joined reads
sample1,sample1_1_L001_R1_001.fastq.gz,forward
sample2,sample2_2_L001_R1_001.fastq.gz,forward