Skip to content
Commits on Source (6)
# Modified from https://github.com/biocore/scikit-bio/
# Modified from https://github.com/biocore/scikit-bio
language: python
env:
- PYTHON_VERSION=2.7 WITH_DOCTEST=False USE_CYTHON=True
- PYTHON_VERSION=3.5 WITH_DOCTEST=True USE_CYTHON=True
- PYTHON_VERSION=3.6 WITH_DOCTEST=True USE_CYTHON=True
- PYTHON_VERSION=3.7 WITH_DOCTEST=True USE_CYTHON=True
- PYTHON_VERSION=3.6 WITH_DOCTEST=True
- PYTHON_VERSION=3.7 WITH_DOCTEST=True
- PYTHON_VERSION=3.8 WITH_DOCTEST=True
before_install:
- wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
- chmod +x miniconda.sh
- ./miniconda.sh -b
- export PATH=/home/travis/miniconda3/bin:$PATH
install:
- conda create --yes -n env_name python=$PYTHON_VERSION pip click numpy scipy pep8 flake8 coverage future six "pandas>=0.20.0" nose h5py>=2.2.0 cython
- conda create --yes -n env_name python=$PYTHON_VERSION pip click numpy "scipy>=1.3.1" pep8 flake8 coverage future six "pandas>=0.20.0" nose h5py>=2.2.0 cython
- rm biom/*.c
- source activate env_name
- if [ ${PYTHON_VERSION} = "2.7" ]; then pip install pyqi; fi
- if [ ${PYTHON_VERSION} = "2.7" ]; then conda install --yes Sphinx=1.2.2; fi
- if [ ${PYTHON_VERSION} = "3.6" ]; then pip install sphinx==1.2.2; fi
- pip install coveralls
- pip install -e . --no-deps
script:
- make test
- biom show-install-info
- if [ ${PYTHON_VERSION} = "2.7" ]; then make -C doc html; fi
- if [ ${PYTHON_VERSION} = "3.6" ]; then make -C doc html; fi
# we can only validate the tables if we have H5PY
- for table in examples/*hdf5.biom; do echo ${table}; biom validate-table -i ${table}; done
# validate JSON formatted tables
......
BIOM-Format ChangeLog
=====================
biom 2.1.8
----------
New features and bug fixes, released on 6 January 2020.
Important:
* Python 2.7 and 3.5 support has been dropped.
* Python 3.8 support has been added into Travis CI.
* A change to the defaults for `Table.nonzero_counts` was performed such that the default now is to count the number of nonzero features. See [issue #685](https://github.com/biocore/biom-format/issues/685)
* We now require a SciPy >= 1.3.1. See [issue #816](https://github.com/biocore/biom-format/issues/816)
New Features:
* The detailed report is no longer part of the table validator. See [issue #378](https://github.com/biocore/biom-format/issues/378).
* `load_table` now accepts open file handles. See [issue #481](https://github.com/biocore/biom-format/issues/481).
* `biom export-metadata` has been added to export metadata as TSV. See [issue #820](https://github.com/biocore/biom-format/issues/820).
Bug fixes:
* `Table.to_dataframe(dense=False)` does now correctly produce sparse data frames (and not accidentally dense ones as before). See [issue #808](https://github.com/biocore/biom-format/issues/808).
* Order of error evaluations was unstable in Python versions without implicit `OrderedDict`. See [issue #813](https://github.com/biocore/biom-format/issues/813). Thanks @gwarmstrong for identifying this bug.
* `Table._extract_data_from_tsv` would fail if taxonomy was provided, and if the first row had the empty string for taxonomy. See [issue #827](https://github.com/biocore/biom-format/issues/827). Thanks @KasperSkytte for identifying this bug.
biom 2.1.7
----------
......
......@@ -30,6 +30,7 @@ def cli(ctx):
import_module('biom.cli.table_summarizer')
import_module('biom.cli.metadata_adder')
import_module('biom.cli.metadata_exporter')
import_module('biom.cli.table_converter')
import_module('biom.cli.installation_informer')
import_module('biom.cli.table_subsetter')
......
# -----------------------------------------------------------------------------
# Copyright (c) 2011-2017, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# -----------------------------------------------------------------------------
import click
from biom import load_table
from biom.cli import cli
@cli.command(name='export-metadata')
@click.option('-i', '--input-fp', required=True,
type=click.Path(exists=True, dir_okay=False),
help='The input BIOM table')
@click.option('-m', '--sample-metadata-fp', required=False,
type=click.Path(exists=False, dir_okay=False),
help='The sample metadata output file.')
@click.option('--observation-metadata-fp', required=False,
type=click.Path(exists=False, dir_okay=False),
help='The observation metadata output file.')
def export_metadata(input_fp, sample_metadata_fp, observation_metadata_fp):
"""Export metadata as TSV.
Example usage:
Export metadata as TSV:
$ biom export-metadata -i otu_table.biom
--sample-metadata-fp sample.tsv
--observation-metadata-fp observation.tsv
"""
table = load_table(input_fp)
if sample_metadata_fp:
_export_metadata(table, 'sample', input_fp, sample_metadata_fp)
if observation_metadata_fp:
_export_metadata(table, 'observation', input_fp,
observation_metadata_fp)
def _export_metadata(table, axis, input_fp, output_fp):
try:
metadata = table.metadata_to_dataframe(axis)
metadata.to_csv(output_fp, sep='\t')
except KeyError:
click.echo('File {} does not contain {} metadata'.format(input_fp,
axis))
......@@ -29,9 +29,7 @@ from biom.util import HAVE_H5PY, biom_open, is_hdf5_file
' specification')
@click.option('-f', '--format-version', default=None,
help='The specific format version to validate against')
@click.option('--detailed-report', is_flag=True, default=False,
help='Include more details in the output report')
def validate_table(input_fp, format_version, detailed_report):
def validate_table(input_fp, format_version):
"""Validate a BIOM-formatted file.
Test a file for adherence to the Biological Observation Matrix (BIOM)
......@@ -46,7 +44,7 @@ def validate_table(input_fp, format_version, detailed_report):
$ biom validate-table -i table.biom
"""
valid, report = _validate_table(input_fp, format_version, detailed_report)
valid, report = _validate_table(input_fp, format_version)
click.echo("\n".join(report))
if valid:
# apparently silence is too quiet to be golden.
......@@ -57,9 +55,8 @@ def validate_table(input_fp, format_version, detailed_report):
sys.exit(1)
def _validate_table(input_fp, format_version=None, detailed_report=False):
result = TableValidator()(table=input_fp, format_version=format_version,
detailed_report=detailed_report)
def _validate_table(input_fp, format_version=None):
result = TableValidator()(table=input_fp, format_version=format_version)
return result['valid_table'], result['report_lines']
......@@ -108,23 +105,15 @@ class TableValidator(object):
raise IOError("h5py is not installed, can only validate JSON "
"tables")
def __call__(self, table, format_version=None, detailed_report=False):
return self.run(table=table, format_version=format_version,
detailed_report=detailed_report)
def __call__(self, table, format_version=None):
return self.run(table=table, format_version=format_version)
def _validate_hdf5(self, **kwargs):
table = kwargs['table']
# Need to make this an attribute so that we have this info during
# validation.
detailed_report = kwargs['detailed_report']
report_lines = []
valid_table = True
if detailed_report:
report_lines.append("Validating BIOM table...")
required_attrs = [
('format-url', self._valid_format_url),
('format-version', self._valid_hdf5_format_version),
......@@ -154,9 +143,6 @@ class TableValidator(object):
report_lines.append("Missing attribute: '%s'" % required_attr)
continue
if detailed_report:
report_lines.append("Validating '%s'..." % required_attr)
status_msg = attr_validator(table)
if len(status_msg) > 0:
......@@ -166,20 +152,12 @@ class TableValidator(object):
for group in required_groups:
if group not in table:
valid_table = False
if detailed_report:
report_lines.append("Missing group: %s" % group)
for dataset in required_datasets:
if dataset not in table:
valid_table = False
if detailed_report:
report_lines.append("Missing dataset: %s" % dataset)
if 'shape' in table.attrs:
if detailed_report:
report_lines.append("Validating 'shape' versus number of "
"samples and observations...")
n_obs, n_samp = table.attrs['shape']
obs_ids = table.get('observation/ids', None)
samp_ids = table.get('sample/ids', None)
......@@ -270,14 +248,10 @@ class TableValidator(object):
# Need to make this an attribute so that we have this info during
# validation.
self._format_version = kwargs['format_version']
detailed_report = kwargs['detailed_report']
report_lines = []
valid_table = True
if detailed_report:
report_lines.append("Validating BIOM table...")
required_keys = [
('format', self._valid_format),
('format_url', self._valid_format_url),
......@@ -299,9 +273,6 @@ class TableValidator(object):
report_lines.append("Missing field: '%s'" % key)
continue
if detailed_report:
report_lines.append("Validating '%s'..." % key)
status_msg = method(table_json)
if len(status_msg) > 0:
......@@ -309,10 +280,6 @@ class TableValidator(object):
report_lines.append(status_msg)
if 'shape' in table_json:
if detailed_report:
report_lines.append("Validating 'shape' versus number of rows "
"and columns...")
if ('rows' in table_json and
len(table_json['rows']) != table_json['shape'][0]):
valid_table = False
......
......@@ -75,7 +75,8 @@ OBSMDSIZE = "Size of observation metadata differs from matrix size!"
SAMPMDSIZE = "Size of sample metadata differs from matrix size!"
def _test_empty(t):
# _zz_ so the sort order places this test last
def _zz_test_empty(t):
"""Check if t is empty"""
return t.is_empty()
......@@ -250,8 +251,9 @@ class ErrorProfile(object):
if not args:
args = self._test.keys()
for errtype in args:
for errtype in sorted(args):
test = self._test.get(errtype, lambda: None)
if test(item):
return self._handle_error(errtype, item)
......@@ -318,7 +320,7 @@ class ErrorProfile(object):
__errprof = ErrorProfile()
__errprof.register('empty', EMPTY, 'ignore', _test_empty,
__errprof.register('empty', EMPTY, 'ignore', _zz_test_empty,
exception=TableException)
__errprof.register('obssize', OBSSIZE, 'raise', _test_obssize,
exception=TableException)
......
......@@ -12,6 +12,8 @@ from __future__ import division
import numpy as np
from future.utils import string_types
import io
import h5py
from biom.exception import BiomParseException, UnknownAxisError
from biom.table import Table
......@@ -341,13 +343,14 @@ def parse_uc(fh):
return Table(data, observation_ids=observation_ids, sample_ids=sample_ids)
def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False):
r"""Parses the biom table stored in the filepath `fp`
def parse_biom_table(file_obj, ids=None, axis='sample', input_is_dense=False):
r"""Parses the biom table stored in `file_obj`
Parameters
----------
fp : file like
File alike object storing the BIOM table
file_obj : file-like object, or list
file-like object storing the BIOM table (tab-delimited or JSON), or
a list of lines of the BIOM table in tab-delimited or JSON format
ids : iterable
The sample/observation ids of the samples/observations that we need
to retrieve from the biom table
......@@ -360,7 +363,7 @@ def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False):
Returns
-------
Table
The BIOM table stored at fp
The BIOM table stored at file_obj
Raises
------
......@@ -391,34 +394,36 @@ def parse_biom_table(fp, ids=None, axis='sample', input_is_dense=False):
UnknownAxisError(axis)
try:
return Table.from_hdf5(fp, ids=ids, axis=axis)
return Table.from_hdf5(file_obj, ids=ids, axis=axis)
except ValueError:
pass
except RuntimeError:
pass
if hasattr(fp, 'read'):
old_pos = fp.tell()
if hasattr(file_obj, 'read'):
old_pos = file_obj.tell()
# Read in characters until first non-whitespace
# If it is a {, then this is (most likely) JSON
c = fp.read(1)
c = file_obj.read(1)
while c.isspace():
c = fp.read(1)
c = file_obj.read(1)
if c == '{':
fp.seek(old_pos)
t = Table.from_json(json.load(fp, object_pairs_hook=OrderedDict),
file_obj.seek(old_pos)
t = Table.from_json(json.load(file_obj,
object_pairs_hook=OrderedDict),
input_is_dense=input_is_dense)
else:
fp.seek(old_pos)
t = Table.from_tsv(fp, None, None, lambda x: x)
elif isinstance(fp, list):
file_obj.seek(old_pos)
t = Table.from_tsv(file_obj, None, None, lambda x: x)
elif isinstance(file_obj, list):
try:
t = Table.from_json(json.loads(''.join(fp),
t = Table.from_json(json.loads(''.join(file_obj),
object_pairs_hook=OrderedDict),
input_is_dense=input_is_dense)
except ValueError:
t = Table.from_tsv(fp, None, None, lambda x: x)
t = Table.from_tsv(file_obj, None, None, lambda x: x)
else:
t = Table.from_json(json.loads(fp, object_pairs_hook=OrderedDict),
t = Table.from_json(json.loads(file_obj,
object_pairs_hook=OrderedDict),
input_is_dense=input_is_dense)
def subset_ids(data, id_, md):
......@@ -632,7 +637,8 @@ def load_table(f):
Parameters
----------
f : str
f : str or file-like object
The entity to parse
Returns
-------
......@@ -655,6 +661,12 @@ def load_table(f):
>>> table = load_table('path/to/table.biom') # doctest: +SKIP
"""
if isinstance(f, (io.IOBase, h5py.File)):
try:
table = parse_biom_table(f)
except (IndexError, TypeError):
raise TypeError("%s does not appear to be a BIOM file!" % f)
else:
with biom_open(f) as fp:
try:
table = parse_biom_table(fp)
......
......@@ -178,7 +178,7 @@ import scipy.stats
from copy import deepcopy
from datetime import datetime
from json import dumps
from functools import reduce
from functools import reduce, partial
from operator import itemgetter
from future.builtins import zip
from future.utils import viewitems
......@@ -2822,7 +2822,7 @@ class Table(object):
Parameters
----------
inplace : bool, optional
Defaults to ``False``
Defaults to ``True``
Returns
-------
......@@ -3103,7 +3103,7 @@ class Table(object):
for col_idx in indices[start:end]:
yield (obs_id, samp_ids[col_idx])
def nonzero_counts(self, axis, binary=False):
def nonzero_counts(self, axis, binary=True):
"""Get nonzero summaries about an axis
Parameters
......@@ -3111,7 +3111,7 @@ class Table(object):
axis : {'sample', 'observation', 'whole'}
The axis on which to count nonzero entries
binary : bool, optional
Defaults to ``False``. If ``True``, return number of nonzero
Defaults to ``True``. If ``True``, return number of nonzero
entries. If ``False``, sum the values of the entries.
Returns
......@@ -3252,26 +3252,26 @@ class Table(object):
alignable_o = self_o == other_o
alignable_s = self_s == other_s
if axis is 'both' and not (alignable_o and alignable_s):
if axis == 'both' and not (alignable_o and alignable_s):
raise DisjointIDError("Cannot align both axes")
elif axis is 'sample' and not alignable_s:
elif axis == 'sample' and not alignable_s:
raise DisjointIDError("Cannot align samples")
elif axis is 'observation' and not alignable_o:
elif axis == 'observation' and not alignable_o:
raise DisjointIDError("Cannot align observations")
elif axis is 'detect' and not (alignable_o or alignable_s):
elif axis == 'detect' and not (alignable_o or alignable_s):
raise DisjointIDError("Neither axis appears alignable")
if axis is 'both':
if axis == 'both':
order = ['observation', 'sample']
elif axis is 'detect':
elif axis == 'detect':
order = []
if alignable_s:
order.append('sample')
if alignable_o:
order.append('observation')
elif axis is 'sample':
elif axis == 'sample':
order = ['sample']
elif axis is 'observation':
elif axis == 'observation':
order = ['observation']
else:
raise UnknownAxisError("Unrecognized axis: %s" % axis)
......@@ -3506,18 +3506,18 @@ class Table(object):
"""
# determine the sample order in the resulting table
if sample is 'union':
if sample == 'union':
new_samp_order = self._union_id_order(self.ids(), other.ids())
elif sample is 'intersection':
elif sample == 'intersection':
new_samp_order = self._intersect_id_order(self.ids(), other.ids())
else:
raise TableException("Unknown sample merge type: %s" % sample)
# determine the observation order in the resulting table
if observation is 'union':
if observation == 'union':
new_obs_order = self._union_id_order(
self.ids(axis='observation'), other.ids(axis='observation'))
elif observation is 'intersection':
elif observation == 'intersection':
new_obs_order = self._intersect_id_order(
self.ids(axis='observation'), other.ids(axis='observation'))
else:
......@@ -4045,9 +4045,10 @@ html
mat = self.matrix_data.toarray()
constructor = pd.DataFrame
else:
mat = [pd.SparseSeries(r.toarray().squeeze())
for r in self.matrix_data.tocsr()]
constructor = pd.SparseDataFrame
mat = self.matrix_data
constructor = partial(pd.SparseDataFrame,
default_fill_value=0,
copy=True)
return constructor(mat, index=index, columns=columns)
......@@ -4688,6 +4689,14 @@ html
.. shownumpydoc
"""
def isfloat(value):
# see https://stackoverflow.com/a/20929881
try:
float(value)
return True
except ValueError:
return False
if not isinstance(lines, list):
try:
hasattr(lines, 'seek')
......@@ -4706,37 +4715,28 @@ html
# Covers the case where the first line is the header
# and there is no indication of it (no comment character)
if not header:
header = line.strip().split(delim)[1:]
header = line.rstrip().split(delim)[1:]
data_start = list_index + 1
else:
data_start = list_index
break
list_index += 1
header = line.strip().split(delim)[1:]
# If the first line is the header, then we need to get the next
# If the first line is the header, then we need to get the data lines
# line for the "last column" check
if isinstance(lines, list):
line = lines[data_start]
value_checks = lines[data_start:]
else:
lines.seek(0)
for index in range(0, data_start + 1):
line = lines.readline()
for index in range(0, data_start):
lines.readline()
value_checks = [line for line in lines]
# attempt to determine if the last column is non-numeric, ie, metadata
first_values = line.strip().split(delim)
last_value = first_values[-1]
last_column_is_numeric = True
if '.' in last_value:
try:
float(last_value)
except ValueError:
last_column_is_numeric = False
else:
try:
int(last_value)
except ValueError:
last_column_is_numeric = False
last_values = [line.rsplit(delim, 1)[-1].strip()
for line in value_checks]
last_column_is_numeric = all([isfloat(i) for i in last_values])
# determine sample ids
if last_column_is_numeric:
......@@ -4761,13 +4761,13 @@ html
lines = lines[data_start:]
for lineno, line in enumerate(lines, data_start):
line = line.strip()
if not line:
if not line.strip():
continue
if line.startswith('#'):
continue
fields = line.strip().split(delim)
fields = line.split(delim)
fields[-1] = fields[-1].strip()
obs_ids.append(fields[0])
if last_column_is_numeric:
......
......@@ -121,9 +121,8 @@ class TableValidatorTests(TestCase):
f.close()
self.to_remove.append('valid_test3')
obs = self.cmd(table='valid_test3', detailed_report=True)
obs = self.cmd(table='valid_test3')
self.assertTrue(obs['valid_table'])
self.assertTrue(len(obs['report_lines']) > 0)
def test_invalid(self):
"""Correctly invalidates a table that is... invalid."""
......
......@@ -11,12 +11,14 @@
from unittest import TestCase, main
from copy import deepcopy
import numpy as np
from biom import example_table, Table
from biom.exception import TableException
from biom.err import (_test_empty, _test_obssize, _test_sampsize, _test_obsdup,
_test_sampdup, _test_obsmdsize, _test_sampmdsize,
errstate, geterr, seterr, geterrcall, seterrcall,
errcheck, __errprof)
from biom.err import (_zz_test_empty, _test_obssize, _test_sampsize,
_test_obsdup, _test_sampdup, _test_obsmdsize,
_test_sampmdsize, errstate, geterr, seterr, geterrcall,
seterrcall, errcheck, __errprof)
runtime_ep = __errprof
......@@ -30,8 +32,8 @@ class ErrModeTests(TestCase):
self.ex_table = example_table.copy()
def test_test_empty(self):
self.assertTrue(_test_empty(Table([], [], [])))
self.assertFalse(_test_empty(self.ex_table))
self.assertTrue(_zz_test_empty(Table([], [], [])))
self.assertFalse(_zz_test_empty(self.ex_table))
def test_test_obssize(self):
self.assertFalse(_test_obssize(self.ex_table))
......@@ -87,6 +89,17 @@ class ErrorProfileTests(TestCase):
self.assertTrue(isinstance(self.ep.test(self.ex_table, 'obssize'),
TableException))
def test_test_evaluation_order(self):
# issue 813
tab = Table(np.array([[1, 2], [3, 4]]), ['A', 'B'], ['C', 'D'])
tab._observation_ids = np.array(['A', 'A'], dtype='object')
tab._sample_ids = np.array(['B', 'B'], dtype='object')
self.assertEqual(self.ep.test(tab, 'obsdup', 'sampdup').args[0],
'Duplicate observation IDs')
self.assertEqual(self.ep.test(tab, 'sampdup', 'obsdup').args[0],
'Duplicate observation IDs')
def test_state(self):
self.ep.state = {'all': 'ignore'}
self.assertEqual(set(self.ep._state.values()), set(['ignore']))
......
......@@ -16,7 +16,8 @@ from unittest import TestCase, main
import numpy as np
import numpy.testing as npt
from biom.parse import generatedby, MetadataMap, parse_biom_table, parse_uc
from biom.parse import (generatedby, MetadataMap, parse_biom_table, parse_uc,
load_table)
from biom.table import Table
from biom.util import HAVE_H5PY, __version__
from biom.tests.long_lines import (uc_empty, uc_invalid_id, uc_minimal,
......@@ -237,6 +238,32 @@ class ParseTests(TestCase):
Table.from_hdf5(h5py.File('test_data/test.biom'))
os.chdir(cwd)
@npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
def test_load_table_filepath(self):
cwd = os.getcwd()
if '/' in __file__[1:]:
os.chdir(__file__.rsplit('/', 1)[0])
load_table('test_data/test.biom')
os.chdir(cwd)
@npt.dec.skipif(HAVE_H5PY is False, msg='H5PY is not installed')
def test_load_table_inmemory(self):
cwd = os.getcwd()
if '/' in __file__[1:]:
os.chdir(__file__.rsplit('/', 1)[0])
load_table(h5py.File('test_data/test.biom'))
os.chdir(cwd)
def test_load_table_inmemory_json(self):
cwd = os.getcwd()
if '/' in __file__[1:]:
os.chdir(__file__.rsplit('/', 1)[0])
load_table(open('test_data/test.json'))
os.chdir(cwd)
def test_load_table_inmemory_stringio(self):
load_table(StringIO('\n'.join(self.classic_otu_table1_no_tax)))
def test_parse_biom_table(self):
"""tests for parse_biom_table when we do not have h5py"""
# This is a TSV as a list of lines
......
......@@ -1475,10 +1475,17 @@ class TableTests(TestCase):
def test_to_dataframe(self):
exp = pd.SparseDataFrame(np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]),
index=['O1', 'O2'],
columns=['S1', 'S2', 'S3'])
columns=['S1', 'S2', 'S3'],
default_fill_value=0.0)
obs = example_table.to_dataframe()
pdt.assert_frame_equal(obs, exp)
def test_to_dataframe_is_sparse(self):
df = example_table.to_dataframe()
density = (float(example_table.matrix_data.getnnz()) /
np.prod(example_table.shape))
assert np.allclose(df.density, density)
def test_to_dataframe_dense(self):
exp = pd.DataFrame(np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]),
index=['O1', 'O2'],
......@@ -2228,9 +2235,9 @@ class SparseTableTests(TestCase):
exp_obs = np.array([14, 15, 0])
exp_whole = np.array([29])
obs_samp = st.nonzero_counts('sample')
obs_obs = st.nonzero_counts('observation')
obs_whole = st.nonzero_counts('whole')
obs_samp = st.nonzero_counts('sample', binary=False)
obs_obs = st.nonzero_counts('observation', binary=False)
obs_whole = st.nonzero_counts('whole', binary=False)
npt.assert_equal(obs_samp, exp_samp)
npt.assert_equal(obs_obs, exp_obs)
......@@ -3771,6 +3778,47 @@ class SparseTableTests(TestCase):
obs = Table._extract_data_from_tsv(input, dtype=int)
npt.assert_equal(obs, exp)
def test_extract_data_from_tsv_bad_metadata(self):
input = legacy_otu_table_bad_metadata.splitlines()
samp_ids = ['Fing', 'Key', 'NA']
obs_ids = ['0', '1', '7', '3', '4']
metadata = [
'',
'Bacteria; Firmicutes; Alicyclobacillaceae; Bacilli; Lactobacillal'
'es; Lactobacillales; Streptococcaceae; Streptococcus',
'Bacteria; Actinobacteria; Actinobacteridae; Gordoniaceae; Coryneb'
'acteriaceae',
'Bacteria; Firmicutes; Alicyclobacillaceae; Bacilli; Staphylococca'
'ceae',
'Bacteria; Cyanobacteria; Chloroplasts; vectors']
md_name = 'Consensus Lineage'
data = [[0, 0, 19111], [0, 1, 44536], [0, 2, 42],
[1, 0, 1216], [1, 1, 3500], [1, 2, 6],
[2, 0, 1803], [2, 1, 1184], [2, 2, 2],
[3, 0, 1722], [3, 1, 4903], [3, 2, 17],
[4, 0, 589], [4, 1, 2074], [4, 2, 34]]
exp = (samp_ids, obs_ids, data, metadata, md_name)
obs = Table._extract_data_from_tsv(input, dtype=int)
npt.assert_equal(obs, exp)
# and assert the exact identified bug in #827 is resolved
input = extract_tsv_bug.splitlines()
samp_ids = ['s1', 's2']
obs_ids = ['1', '2', '3']
metadata = [
'',
'k__test;p__test',
'k__test;p__test']
md_name = 'taxonomy'
data = [[0, 0, 123], [0, 1, 32],
[1, 0, 315], [1, 1, 3],
[2, 1, 22]]
exp = (samp_ids, obs_ids, data, metadata, md_name)
obs = Table._extract_data_from_tsv(input, dtype=int)
npt.assert_equal(obs, exp)
def test_identify_bad_value(self):
pos = [str(i) for i in range(10)]
exp = (None, None)
......@@ -4116,6 +4164,21 @@ ae; Corynebacteriaceae
aphylococcaceae
4\t589\t2074\t34\tBacteria; Cyanobacteria; Chloroplasts; vectors
"""
legacy_otu_table_bad_metadata = u"""# some comment goes here
#OTU id\tFing\tKey\tNA\tConsensus Lineage
0\t19111\t44536\t42 \t
1\t1216\t3500\t6\tBacteria; Firmicutes; Alicyclobacillaceae; Bacilli; La\
ctobacillales; Lactobacillales; Streptococcaceae; Streptococcus
7\t1803\t1184\t2\tBacteria; Actinobacteria; Actinobacteridae; Gordoniace\
ae; Corynebacteriaceae
3\t1722\t4903\t17\tBacteria; Firmicutes; Alicyclobacillaceae; Bacilli; St\
aphylococcaceae
4\t589\t2074\t34\tBacteria; Cyanobacteria; Chloroplasts; vectors
"""
extract_tsv_bug = """#OTU ID s1 s2 taxonomy
1 123 32\t
2 315 3 k__test;p__test
3 0 22 k__test;p__test"""
otu_table1 = u"""# Some comment
#OTU ID\tFing\tKey\tNA\tConsensus Lineage
0\t19111\t44536\t42\tBacteria; Actinobacteria; Actinobacteridae; \
......
......@@ -9,7 +9,6 @@
# ----------------------------------------------------------------------------
import os
import sys
import inspect
from contextlib import contextmanager
import io
......@@ -27,10 +26,6 @@ try:
import h5py
HAVE_H5PY = True
if sys.version_info.major == 2:
H5PY_VLEN_STR = h5py.special_dtype(vlen=unicode) # noqa
H5PY_VLEN_UNICODE = h5py.special_dtype(vlen=unicode) # noqa
else:
H5PY_VLEN_STR = h5py.special_dtype(vlen=str)
H5PY_VLEN_UNICODE = h5py.special_dtype(vlen=str)
......@@ -50,7 +45,7 @@ __url__ = "http://biom-format.org"
__maintainer__ = "Daniel McDonald"
__email__ = "daniel.mcdonald@colorado.edu"
__format_version__ = (2, 1)
__version__ = "2.1.7"
__version__ = "2.1.8"
def generate_subsamples(table, n, axis='sample', by_id=False):
......@@ -390,7 +385,8 @@ def is_gzip(fp):
project, but we obtained permission from the authors of this function to
port it to the BIOM Format project (and keep it under BIOM's BSD license).
"""
return open(fp, 'rb').read(2) == b'\x1f\x8b'
with open(fp, 'rb') as f:
return f.read(2) == b'\x1f\x8b'
@contextmanager
......
python-biom-format (2.1.8+dfsg-1) unstable; urgency=medium
* Drop cython from Build-Depends
Closes: #937605
* New upstream version
* Set upstream metadata fields: Bug-Submit.
-- Andreas Tille <tille@debian.org> Mon, 20 Jan 2020 11:35:04 +0100
python-biom-format (2.1.7+dfsg-5) unstable; urgency=medium
* Set upstream metadata fields: Bug-Database, Repository, Repository-
......
......@@ -6,7 +6,6 @@ Testsuite: autopkgtest-pkg-python
Priority: optional
Build-Depends: debhelper-compat (= 12),
dh-python,
cython,
help2man,
bash-completion,
cython3,
......
Bug-Submit: https://github.com/biocore/biom-format/issues/new
Reference:
Author: >
Daniel McDonald and Jose C. Clemente and Justin Kuczynski and Jai
......
......@@ -66,8 +66,8 @@ copyright = u'2011-2018 The BIOM Format Development Team'
# built documents.
#
# The full version, including alpha/beta/rc tags.
version = "2.1.7"
release = "2.1.7"
version = "2.1.8"
release = "2.1.8"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
......
......@@ -9,7 +9,6 @@
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
import os
import sys
from setuptools import setup, find_packages
......@@ -21,6 +20,13 @@ try:
except ImportError:
raise ImportError("numpy must be installed prior to installing biom")
try:
from Cython.Build import cythonize
except ImportError:
raise ImportError("cython must be installed prior to installing biom")
# Hack to prevent stupid "TypeError: 'NoneType' object is not callable" error
# in multiprocessing/util.py _exit_function when running `python
# setup.py test` (see
......@@ -37,7 +43,7 @@ __copyright__ = "Copyright 2011-2017, The BIOM Format Development Team"
__credits__ = ["Greg Caporaso", "Daniel McDonald", "Jose Clemente",
"Jai Ram Rideout", "Jorge Cañardo Alastuey", "Michael Hall"]
__license__ = "BSD"
__version__ = "2.1.7"
__version__ = "2.1.8"
__maintainer__ = "Daniel McDonald"
__email__ = "mcdonadt@colorado.edu"
......@@ -92,10 +98,9 @@ classes = """
Topic :: Software Development :: Libraries :: Application Frameworks
Topic :: Software Development :: Libraries :: Python Modules
Programming Language :: Python
Programming Language :: Python :: 2.7
Programming Language :: Python :: 3.4
Programming Language :: Python :: 3.5
Programming Language :: Python :: 3.6
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: Implementation :: CPython
Operating System :: OS Independent
Operating System :: POSIX :: Linux
......@@ -104,8 +109,7 @@ classes = """
classifiers = [s.strip() for s in classes.split('\n') if s]
# Dealing with Cython
USE_CYTHON = os.environ.get('USE_CYTHON', False)
ext = '.pyx' if USE_CYTHON else '.c'
ext = '.pyx'
extensions = [Extension("biom._filter",
["biom/_filter" + ext],
include_dirs=[np.get_include()]),
......@@ -115,22 +119,15 @@ extensions = [Extension("biom._filter",
Extension("biom._subsample",
["biom/_subsample" + ext],
include_dirs=[np.get_include()])]
if USE_CYTHON:
from Cython.Build import cythonize
extensions = cythonize(extensions)
install_requires = ["click", "numpy >= 1.9.2", "future >= 0.16.0",
"scipy >= 0.13.0", 'pandas >= 0.20.0',
"six >= 1.10.0"]
"scipy >= 1.3.1", 'pandas >= 0.20.0',
"six >= 1.10.0", "cython >= 0.29"]
# HACK: for backward-compatibility with QIIME 1.9.x, pyqi must be installed.
# pyqi is not used anymore in this project.
if sys.version_info[0] < 3:
install_requires.append("pyqi")
import warnings
warnings.warn("Python 2.7 support will be removed on the next release",
DeprecationWarning)
raise SystemExit("Python 2.7 is no longer supported")
setup(name='biom-format',
version=__version__,
......