From 7a315a1a4637c5ef393e2f08b8c2025f7b6509aa Mon Sep 17 00:00:00 2001 From: Andrius Merkys <merkys@debian.org> Date: Wed, 27 Nov 2019 04:22:49 -0500 Subject: [PATCH] New upstream version 0.2.3 --- .travis.yml | 4 +++- AUTHORS.rst | 4 ++++ CHANGELOG.rst | 17 ++++++++++++++++ COPYING | 2 +- MANIFEST.in | 2 ++ README.rst | 4 ---- agateexcel/table_xls.py | 37 +++++++++++++++++++++++++--------- agateexcel/table_xlsx.py | 33 ++++++++++++++++++++---------- docs/conf.py | 8 +++++--- example.py | 2 +- requirements-py3.txt | 2 +- setup.py | 5 +++-- tests/test_table_xls.py | 43 +++++++++++++++++++++++++++++----------- tests/test_table_xlsx.py | 41 ++++++++++++++++++++++++++++---------- tox.ini | 12 +++++++---- 15 files changed, 157 insertions(+), 59 deletions(-) mode change 100644 => 100755 agateexcel/table_xlsx.py diff --git a/.travis.yml b/.travis.yml index b8823ad..da38eb7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,11 @@ +dist: xenial language: python python: - "2.7" - - "3.3" - "3.4" - "3.5" + - "3.6" + - "3.7" # command to install dependencies install: - if [[ $TRAVIS_PYTHON_VERSION == 3* ]]; then pip install -r requirements-py3.txt; else pip install -r requirements-py2.txt; fi diff --git a/AUTHORS.rst b/AUTHORS.rst index 058c201..337e959 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -4,3 +4,7 @@ The following individuals have contributed code to agate-excel: * `James McKinney <https://github.com/jpmckinney>`_ * `Ben Welsh <https://github.com/palewire>`_ * `Peter M. Landwehr <https://github.com/pmlandwehr>`_ +* `Tim Freund <https://github.com/timfreund>`_ +* `Jani Mikkonen <https://github.com/rasjani>`_ +* `Loïc Corbasson <https://github.com/lcorbasson>`_ +* `Robert Schütz <https://github.com/dotlambda>`_ diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 424ae10..72f4df0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,20 @@ +0.2.3 +----- + +* Fix bug in accepting ``column_names`` as keyword argument. +* Add a ``reset_dimensions`` argument to :meth:`.Table.from_xlsx` to recalculate the data's dimensions, instead of trusting those in the file's properties. +* Include tests and examples in distribution. +* agate-excel is now tested against Python 3.6 and 3.7. +* Drop support for Python 3.3 (end-of-life was September 29, 2017). +* Add support for openpyxl 2.6.0. + +0.2.2 - January 28, 2018 +------------------------ + +* Add an ``encoding_override`` argument to :meth:`.Table.from_xls` to override the encoding of the input XLS file. +* Add a ``header`` argument to :meth:`.Table.from_xls` and :meth:`.Table.from_xlsx` to indicate the presence of a header row. +* Add a ``read_only`` argument to :meth:`.Table.from_xlsx` to allow disabling read-only mode for `some spreadsheets <https://openpyxl.readthedocs.io/en/stable/optimized.html#worksheet-dimensions>`_. + 0.2.1 - February 28, 2017 ------------------------- diff --git a/COPYING b/COPYING index 15aa61e..8f55739 100644 --- a/COPYING +++ b/COPYING @@ -1,6 +1,6 @@ The MIT License -Copyright (c) 2015 Christopher Groskopf and contributors +Copyright (c) 2017 Christopher Groskopf and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/MANIFEST.in b/MANIFEST.in index 45eee36..eaee0cc 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,5 @@ include COPYING include AUTHORS.rst include README.rst +recursive-include tests *.py +graft examples diff --git a/README.rst b/README.rst index 91b0427..baffd5e 100644 --- a/README.rst +++ b/README.rst @@ -2,10 +2,6 @@ :target: https://travis-ci.org/wireservice/agate-excel :alt: Build status -.. image:: https://img.shields.io/pypi/dw/agate-excel.svg - :target: https://pypi.python.org/pypi/agate-excel - :alt: PyPI downloads - .. image:: https://img.shields.io/pypi/v/agate-excel.svg :target: https://pypi.python.org/pypi/agate-excel :alt: Version diff --git a/agateexcel/table_xls.py b/agateexcel/table_xls.py index 15a0d49..26a5122 100644 --- a/agateexcel/table_xls.py +++ b/agateexcel/table_xls.py @@ -11,7 +11,8 @@ import agate import six import xlrd -def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs): + +def from_xls(cls, path, sheet=None, skip_lines=0, header=True, encoding_override=None, **kwargs): """ Parse an XLS file. @@ -22,15 +23,17 @@ def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs): then the first sheet will be used. :param skip_lines: The number of rows to skip from the top of the sheet. + :param header: + If :code:`True`, the first row is assumed to contain column names. """ if not isinstance(skip_lines, int): raise ValueError('skip_lines argument must be an int') if hasattr(path, 'read'): - book = xlrd.open_workbook(file_contents=path.read()) + book = xlrd.open_workbook(file_contents=path.read(), encoding_override=encoding_override) else: with open(path, 'rb') as f: - book = xlrd.open_workbook(file_contents=f.read()) + book = xlrd.open_workbook(file_contents=f.read(), encoding_override=encoding_override) multiple = agate.utils.issequence(sheet) if multiple: @@ -48,15 +51,19 @@ def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs): else: sheet = book.sheet_by_index(0) - column_names = [] + if header: + offset = 1 + column_names = [] + else: + offset = 0 + column_names = None + columns = [] for i in range(sheet.ncols): data = sheet.col_values(i) - name = six.text_type(data[skip_lines]) or None - values = data[skip_lines + 1:] - types = sheet.col_types(i)[skip_lines + 1:] - + values = data[skip_lines + offset:] + types = sheet.col_types(i)[skip_lines + offset:] excel_type = determine_excel_type(types) if excel_type == xlrd.biffh.XL_CELL_BOOLEAN: @@ -64,7 +71,10 @@ def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs): elif excel_type == xlrd.biffh.XL_CELL_DATE: values = normalize_dates(values, book.datemode) - column_names.append(name) + if header: + name = six.text_type(data[skip_lines]) or None + column_names.append(name) + columns.append(values) rows = [] @@ -73,6 +83,11 @@ def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs): for i in range(len(columns[0])): rows.append([c[i] for c in columns]) + if 'column_names' in kwargs: + if not header: + column_names = kwargs['column_names'] + del kwargs['column_names'] + tables[sheet.name] = agate.Table(rows, column_names, **kwargs) if multiple: @@ -80,6 +95,7 @@ def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs): else: return tables.popitem()[1] + def determine_excel_type(types): """ Determine the correct type for a column from a list of cell types. @@ -96,6 +112,7 @@ def determine_excel_type(types): except KeyError: return xlrd.biffh.XL_CELL_EMPTY + def normalize_booleans(values): normalized = [] @@ -107,6 +124,7 @@ def normalize_booleans(values): return normalized + def normalize_dates(values, datemode=0): """ Normalize a column of date cells. @@ -131,4 +149,5 @@ def normalize_dates(values, datemode=0): return normalized + agate.Table.from_xls = classmethod(from_xls) diff --git a/agateexcel/table_xlsx.py b/agateexcel/table_xlsx.py old mode 100644 new mode 100755 index f027d32..e3f3c1d --- a/agateexcel/table_xlsx.py +++ b/agateexcel/table_xlsx.py @@ -13,7 +13,9 @@ import six NULL_TIME = datetime.time(0, 0, 0) -def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs): + +def from_xlsx(cls, path, sheet=None, skip_lines=0, header=True, read_only=True, + reset_dimensions=False, **kwargs): """ Parse an XLSX file. @@ -24,6 +26,11 @@ def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs): then the "active" sheet will be used. :param skip_lines: The number of rows to skip from the top of the sheet. + :param header: + If :code:`True`, the first row is assumed to contain column names. + :param reset_dimensions: + If :code:`True`, do not trust the dimensions in the file's properties, + and recalculate them based on the data in the file. """ if not isinstance(skip_lines, int): raise ValueError('skip_lines argument must be an int') @@ -33,7 +40,7 @@ def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs): else: f = open(path, 'rb') - book = openpyxl.load_workbook(f, read_only=True, data_only=True) + book = openpyxl.load_workbook(f, read_only=read_only, data_only=True) multiple = agate.utils.issequence(sheet) if multiple: @@ -51,11 +58,14 @@ def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs): else: sheet = book.active - column_names = [] + column_names = None rows = [] - for i, row in enumerate(sheet.iter_rows(row_offset=skip_lines)): - if i == 0: + if reset_dimensions: + sheet.reset_dimensions() + + for i, row in enumerate(sheet.iter_rows(min_row=skip_lines + 1)): + if i == 0 and header: column_names = [None if c.value is None else six.text_type(c.value) for c in row] continue @@ -79,6 +89,11 @@ def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs): rows.append(values) + if 'column_names' in kwargs: + if not header: + column_names = kwargs['column_names'] + del kwargs['column_names'] + tables[sheet.title] = agate.Table(rows, column_names, **kwargs) f.close() @@ -88,6 +103,7 @@ def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs): else: return tables.popitem()[1] + def normalize_datetime(dt): if dt.microsecond == 0: return dt @@ -101,17 +117,14 @@ def normalize_datetime(dt): return dt + def has_date_elements(cell): """ Try to use formatting to determine if a cell contains only time info. See: http://office.microsoft.com/en-us/excel-help/number-format-codes-HP005198679.aspx """ - if 'd' in cell.number_format or \ - 'y' in cell.number_format: - - return True + return 'd' in cell.number_format or 'y' in cell.number_format - return False agate.Table.from_xlsx = classmethod(from_xlsx) diff --git a/docs/conf.py b/docs/conf.py index 255f713..05d1315 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- # +# flake8: noqa +# # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this @@ -45,16 +47,16 @@ master_doc = 'index' # General information about the project. project = u'agate-excel' -copyright = u'2015, Christopher Groskopf' +copyright = u'2017, Christopher Groskopf' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '0.2.1' +version = '0.2.3' # The full version, including alpha/beta/rc tags. -release = '0.2.1' +release = '0.2.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/example.py b/example.py index 8b41846..c7a5c27 100755 --- a/example.py +++ b/example.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import agate -import agateexcel +import agateexcel # noqa table = agate.Table.from_xls('examples/test.xls') diff --git a/requirements-py3.txt b/requirements-py3.txt index b09f6c0..cc5825e 100644 --- a/requirements-py3.txt +++ b/requirements-py3.txt @@ -1,5 +1,5 @@ nose>=1.1.2 -tox>=1.3 +tox>=3.1.0 Sphinx>=1.2.2 sphinx_rtd_theme>=0.1.6 wheel>=0.24.0 diff --git a/setup.py b/setup.py index 1d87130..0f020e6 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ install_requires = [ setup( name='agate-excel', - version='0.2.1', + version='0.2.3', description='agate-excel adds read support for Excel files (xls and xlsx) to agate.', long_description=open('README.rst').read(), author='Christopher Groskopf', @@ -26,9 +26,10 @@ setup( 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Topic :: Multimedia :: Graphics', diff --git a/tests/test_table_xls.py b/tests/test_table_xls.py index 2194455..6d02d74 100644 --- a/tests/test_table_xls.py +++ b/tests/test_table_xls.py @@ -3,33 +3,40 @@ import datetime -try: - import unittest2 as unittest -except ImportError: - import unittest - import agate -import agateexcel +import agateexcel # noqa + class TestXLS(agate.AgateTestCase): def setUp(self): self.rows = ( (1, 'a', True, '11/4/2015', '11/4/2015 12:22 PM'), (2, u'ðŸ‘', False, '11/5/2015', '11/4/2015 12:45 PM'), - (None, 'b', None, None, None) + (None, 'b', None, None, None), ) self.column_names = [ - 'number', 'text', 'boolean', 'date', 'datetime' + 'number', 'text', 'boolean', 'date', 'datetime', + ] + + self.user_provided_column_names = [ + 'alt number', 'alt text', 'alt boolean', 'alt date', 'alt datetime', ] self.column_types = [ agate.Number(), agate.Text(), agate.Boolean(), - agate.Date(), agate.DateTime() + agate.Date(), agate.DateTime(), ] self.table = agate.Table(self.rows, self.column_names, self.column_types) + def test_from_xls_with_column_names(self): + table = agate.Table.from_xls('examples/test.xls', header=False, skip_lines=1, column_names=self.user_provided_column_names ) + + self.assertColumnNames(table, self.user_provided_column_names) + self.assertColumnTypes(table, [agate.Number, agate.Text, agate.Boolean, agate.Date, agate.DateTime]) + self.assertRows(table, [r.values() for r in self.table.rows]) + def test_from_xls(self): table = agate.Table.from_xls('examples/test.xls') @@ -81,6 +88,18 @@ class TestXLS(agate.AgateTestCase): self.assertColumnTypes(table, [agate.Number, agate.Text, agate.Boolean, agate.Date, agate.DateTime]) self.assertRows(table, [r.values() for r in self.table.rows]) + def test_header(self): + table = agate.Table.from_xls('examples/test_zeros.xls', header=False) + + self.assertColumnNames(table, ('a', 'b', 'c')) + self.assertColumnTypes(table, [agate.Text, agate.Text, agate.Text]) + self.assertRows(table, [ + ['ordinal', 'binary', 'all_zero'], + ['0.0', '0.0', '0.0'], + ['1.0', '1.0', '0.0'], + ['2.0', '1.0', '0.0'], + ]) + def test_zeros(self): table = agate.Table.from_xls('examples/test_zeros.xls') @@ -89,7 +108,7 @@ class TestXLS(agate.AgateTestCase): self.assertRows(table, [ [0, 0, 0], [1, 1, 0], - [2, 1, 0] + [2, 1, 0], ]) def test_ambiguous_date(self): @@ -98,7 +117,7 @@ class TestXLS(agate.AgateTestCase): self.assertColumnNames(table, ['s']) self.assertColumnTypes(table, [agate.Date]) self.assertRows(table, [ - [datetime.date(1900, 1, 1)] + [datetime.date(1900, 1, 1)], ]) def test_empty(self): @@ -114,5 +133,5 @@ class TestXLS(agate.AgateTestCase): self.assertColumnNames(table, ('Country', '2013.0', 'c')) self.assertColumnTypes(table, [agate.Text, agate.Number, agate.Text]) self.assertRows(table, [ - ['Canada', 35160000, 'value'] + ['Canada', 35160000, 'value'], ]) diff --git a/tests/test_table_xlsx.py b/tests/test_table_xlsx.py index b7fb38c..9b56b9b 100644 --- a/tests/test_table_xlsx.py +++ b/tests/test_table_xlsx.py @@ -3,33 +3,40 @@ import datetime -try: - import unittest2 as unittest -except ImportError: - import unittest - import agate -import agateexcel +import agateexcel # noqa + class TestXLSX(agate.AgateTestCase): def setUp(self): self.rows = ( (1, 'a', True, '11/4/2015', '11/4/2015 12:22 PM'), (2, u'ðŸ‘', False, '11/5/2015', '11/4/2015 12:45 PM'), - (None, 'b', None, None, None) + (None, 'b', None, None, None), ) self.column_names = [ - 'number', 'text', 'boolean', 'date', 'datetime' + 'number', 'text', 'boolean', 'date', 'datetime', + ] + + self.user_provided_column_names = [ + 'number', 'text', 'boolean', 'date', 'datetime', ] self.column_types = [ agate.Number(), agate.Text(), agate.Boolean(), - agate.Date(), agate.DateTime() + agate.Date(), agate.DateTime(), ] self.table = agate.Table(self.rows, self.column_names, self.column_types) + def test_from_xlsx_with_column_names(self): + table = agate.Table.from_xlsx('examples/test.xlsx', header=False, skip_lines=1, column_names=self.user_provided_column_names) + + self.assertColumnNames(table, self.user_provided_column_names) + self.assertColumnTypes(table, [agate.Number, agate.Text, agate.Boolean, agate.Date, agate.DateTime]) + self.assertRows(table, [r.values() for r in self.table.rows]) + def test_from_xlsx(self): table = agate.Table.from_xlsx('examples/test.xlsx') @@ -81,13 +88,25 @@ class TestXLSX(agate.AgateTestCase): self.assertColumnTypes(table, [agate.Number, agate.Text, agate.Boolean, agate.Date, agate.DateTime]) self.assertRows(table, [r.values() for r in self.table.rows]) + def test_header(self): + table = agate.Table.from_xls('examples/test_zeros.xls', header=False) + + self.assertColumnNames(table, ('a', 'b', 'c')) + self.assertColumnTypes(table, [agate.Text, agate.Text, agate.Text]) + self.assertRows(table, [ + ['ordinal', 'binary', 'all_zero'], + ['0.0', '0.0', '0.0'], + ['1.0', '1.0', '0.0'], + ['2.0', '1.0', '0.0'], + ]) + def test_ambiguous_date(self): table = agate.Table.from_xlsx('examples/test_ambiguous_date.xlsx') self.assertColumnNames(table, ['s']) self.assertColumnTypes(table, [agate.Date]) self.assertRows(table, [ - [datetime.date(1899, 12, 31)] + [datetime.date(1899, 12, 31)], ]) def test_empty(self): @@ -103,5 +122,5 @@ class TestXLSX(agate.AgateTestCase): self.assertColumnNames(table, ['Country', '2013', 'c']) self.assertColumnTypes(table, [agate.Text, agate.Number, agate.Text]) self.assertRows(table, [ - ['Canada', 35160000, 'value'] + ['Canada', 35160000, 'value'], ]) diff --git a/tox.ini b/tox.ini index c75ecc9..779fba2 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py27,py33,py34,py35,pypy +envlist = py27,py34,py35,py36,py37,pypy [testenv] deps= @@ -11,15 +11,19 @@ commands=nosetests deps= {[testenv]deps} -[testenv:py33] +[testenv:py34] deps= {[testenv]deps} -[testenv:py34] +[testenv:py35] deps= {[testenv:py33]deps} -[testenv:py35] +[testenv:py36] +deps= + {[testenv:py33]deps} + +[testenv:py37] deps= {[testenv:py33]deps} -- GitLab