From 7a315a1a4637c5ef393e2f08b8c2025f7b6509aa Mon Sep 17 00:00:00 2001
From: Andrius Merkys <merkys@debian.org>
Date: Wed, 27 Nov 2019 04:22:49 -0500
Subject: [PATCH] New upstream version 0.2.3

---
 .travis.yml              |  4 +++-
 AUTHORS.rst              |  4 ++++
 CHANGELOG.rst            | 17 ++++++++++++++++
 COPYING                  |  2 +-
 MANIFEST.in              |  2 ++
 README.rst               |  4 ----
 agateexcel/table_xls.py  | 37 +++++++++++++++++++++++++---------
 agateexcel/table_xlsx.py | 33 ++++++++++++++++++++----------
 docs/conf.py             |  8 +++++---
 example.py               |  2 +-
 requirements-py3.txt     |  2 +-
 setup.py                 |  5 +++--
 tests/test_table_xls.py  | 43 +++++++++++++++++++++++++++++-----------
 tests/test_table_xlsx.py | 41 ++++++++++++++++++++++++++++----------
 tox.ini                  | 12 +++++++----
 15 files changed, 157 insertions(+), 59 deletions(-)
 mode change 100644 => 100755 agateexcel/table_xlsx.py

diff --git a/.travis.yml b/.travis.yml
index b8823ad..da38eb7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,9 +1,11 @@
+dist: xenial
 language: python
 python:
   - "2.7"
-  - "3.3"
   - "3.4"
   - "3.5"
+  - "3.6"
+  - "3.7"
 # command to install dependencies
 install:
     - if [[ $TRAVIS_PYTHON_VERSION == 3* ]]; then pip install -r requirements-py3.txt; else pip install -r requirements-py2.txt; fi
diff --git a/AUTHORS.rst b/AUTHORS.rst
index 058c201..337e959 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -4,3 +4,7 @@ The following individuals have contributed code to agate-excel:
 * `James McKinney <https://github.com/jpmckinney>`_
 * `Ben Welsh <https://github.com/palewire>`_
 * `Peter M. Landwehr <https://github.com/pmlandwehr>`_
+* `Tim Freund <https://github.com/timfreund>`_
+* `Jani Mikkonen <https://github.com/rasjani>`_
+* `LoÃ¯c Corbasson <https://github.com/lcorbasson>`_
+* `Robert SchÃ¼tz <https://github.com/dotlambda>`_
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 424ae10..72f4df0 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,3 +1,20 @@
+0.2.3
+-----
+
+* Fix bug in accepting ``column_names`` as keyword argument.
+* Add a ``reset_dimensions`` argument to :meth:`.Table.from_xlsx` to recalculate the data's dimensions, instead of trusting those in the file's properties.
+* Include tests and examples in distribution.
+* agate-excel is now tested against Python 3.6 and 3.7.
+* Drop support for Python 3.3 (end-of-life was September 29, 2017).
+* Add support for openpyxl 2.6.0.
+
+0.2.2 - January 28, 2018
+------------------------
+
+* Add an ``encoding_override`` argument to :meth:`.Table.from_xls` to override the encoding of the input XLS file.
+* Add a ``header`` argument to :meth:`.Table.from_xls` and :meth:`.Table.from_xlsx` to indicate the presence of a header row.
+* Add a ``read_only`` argument to :meth:`.Table.from_xlsx` to allow disabling read-only mode for `some spreadsheets <https://openpyxl.readthedocs.io/en/stable/optimized.html#worksheet-dimensions>`_.
+
 0.2.1 - February 28, 2017
 -------------------------
 
diff --git a/COPYING b/COPYING
index 15aa61e..8f55739 100644
--- a/COPYING
+++ b/COPYING
@@ -1,6 +1,6 @@
 The MIT License
 
-Copyright (c) 2015 Christopher Groskopf and contributors 
+Copyright (c) 2017 Christopher Groskopf and contributors 
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/MANIFEST.in b/MANIFEST.in
index 45eee36..eaee0cc 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,5 @@
 include COPYING
 include AUTHORS.rst
 include README.rst
+recursive-include tests *.py
+graft examples
diff --git a/README.rst b/README.rst
index 91b0427..baffd5e 100644
--- a/README.rst
+++ b/README.rst
@@ -2,10 +2,6 @@
     :target: https://travis-ci.org/wireservice/agate-excel
     :alt: Build status
 
-.. image:: https://img.shields.io/pypi/dw/agate-excel.svg
-    :target: https://pypi.python.org/pypi/agate-excel
-    :alt: PyPI downloads
-
 .. image:: https://img.shields.io/pypi/v/agate-excel.svg
     :target: https://pypi.python.org/pypi/agate-excel
     :alt: Version
diff --git a/agateexcel/table_xls.py b/agateexcel/table_xls.py
index 15a0d49..26a5122 100644
--- a/agateexcel/table_xls.py
+++ b/agateexcel/table_xls.py
@@ -11,7 +11,8 @@ import agate
 import six
 import xlrd
 
-def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs):
+
+def from_xls(cls, path, sheet=None, skip_lines=0, header=True, encoding_override=None, **kwargs):
     """
     Parse an XLS file.
 
@@ -22,15 +23,17 @@ def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs):
         then the first sheet will be used.
     :param skip_lines:
         The number of rows to skip from the top of the sheet.
+    :param header:
+        If :code:`True`, the first row is assumed to contain column names.
     """
     if not isinstance(skip_lines, int):
         raise ValueError('skip_lines argument must be an int')
 
     if hasattr(path, 'read'):
-        book = xlrd.open_workbook(file_contents=path.read())
+        book = xlrd.open_workbook(file_contents=path.read(), encoding_override=encoding_override)
     else:
         with open(path, 'rb') as f:
-            book = xlrd.open_workbook(file_contents=f.read())
+            book = xlrd.open_workbook(file_contents=f.read(), encoding_override=encoding_override)
 
     multiple = agate.utils.issequence(sheet)
     if multiple:
@@ -48,15 +51,19 @@ def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs):
         else:
             sheet = book.sheet_by_index(0)
 
-        column_names = []
+        if header:
+            offset = 1
+            column_names = []
+        else:
+            offset = 0
+            column_names = None
+
         columns = []
 
         for i in range(sheet.ncols):
             data = sheet.col_values(i)
-            name = six.text_type(data[skip_lines]) or None
-            values = data[skip_lines + 1:]
-            types = sheet.col_types(i)[skip_lines + 1:]
-
+            values = data[skip_lines + offset:]
+            types = sheet.col_types(i)[skip_lines + offset:]
             excel_type = determine_excel_type(types)
 
             if excel_type == xlrd.biffh.XL_CELL_BOOLEAN:
@@ -64,7 +71,10 @@ def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs):
             elif excel_type == xlrd.biffh.XL_CELL_DATE:
                 values = normalize_dates(values, book.datemode)
 
-            column_names.append(name)
+            if header:
+                name = six.text_type(data[skip_lines]) or None
+                column_names.append(name)
+
             columns.append(values)
 
         rows = []
@@ -73,6 +83,11 @@ def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs):
             for i in range(len(columns[0])):
                 rows.append([c[i] for c in columns])
 
+        if 'column_names' in kwargs:
+            if not header:
+                column_names = kwargs['column_names']
+            del kwargs['column_names']
+
         tables[sheet.name] = agate.Table(rows, column_names, **kwargs)
 
     if multiple:
@@ -80,6 +95,7 @@ def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs):
     else:
         return tables.popitem()[1]
 
+
 def determine_excel_type(types):
     """
     Determine the correct type for a column from a list of cell types.
@@ -96,6 +112,7 @@ def determine_excel_type(types):
     except KeyError:
         return xlrd.biffh.XL_CELL_EMPTY
 
+
 def normalize_booleans(values):
     normalized = []
 
@@ -107,6 +124,7 @@ def normalize_booleans(values):
 
     return normalized
 
+
 def normalize_dates(values, datemode=0):
     """
     Normalize a column of date cells.
@@ -131,4 +149,5 @@ def normalize_dates(values, datemode=0):
 
     return normalized
 
+
 agate.Table.from_xls = classmethod(from_xls)
diff --git a/agateexcel/table_xlsx.py b/agateexcel/table_xlsx.py
old mode 100644
new mode 100755
index f027d32..e3f3c1d
--- a/agateexcel/table_xlsx.py
+++ b/agateexcel/table_xlsx.py
@@ -13,7 +13,9 @@ import six
 
 NULL_TIME = datetime.time(0, 0, 0)
 
-def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs):
+
+def from_xlsx(cls, path, sheet=None, skip_lines=0, header=True, read_only=True, 
+              reset_dimensions=False, **kwargs):
     """
     Parse an XLSX file.
 
@@ -24,6 +26,11 @@ def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs):
         then the "active" sheet will be used.
     :param skip_lines:
         The number of rows to skip from the top of the sheet.
+    :param header:
+        If :code:`True`, the first row is assumed to contain column names.
+    :param reset_dimensions:
+        If :code:`True`, do not trust the dimensions in the file's properties, 
+        and recalculate them based on the data in the file.
     """
     if not isinstance(skip_lines, int):
         raise ValueError('skip_lines argument must be an int')
@@ -33,7 +40,7 @@ def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs):
     else:
         f = open(path, 'rb')
 
-    book = openpyxl.load_workbook(f, read_only=True, data_only=True)
+    book = openpyxl.load_workbook(f, read_only=read_only, data_only=True)
 
     multiple = agate.utils.issequence(sheet)
     if multiple:
@@ -51,11 +58,14 @@ def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs):
         else:
             sheet = book.active
 
-        column_names = []
+        column_names = None
         rows = []
 
-        for i, row in enumerate(sheet.iter_rows(row_offset=skip_lines)):
-            if i == 0:
+        if reset_dimensions:
+            sheet.reset_dimensions()
+
+        for i, row in enumerate(sheet.iter_rows(min_row=skip_lines + 1)):
+            if i == 0 and header:
                 column_names = [None if c.value is None else six.text_type(c.value) for c in row]
                 continue
 
@@ -79,6 +89,11 @@ def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs):
 
             rows.append(values)
 
+        if 'column_names' in kwargs:
+            if not header:
+                column_names = kwargs['column_names']
+            del kwargs['column_names']
+
         tables[sheet.title] = agate.Table(rows, column_names, **kwargs)
 
     f.close()
@@ -88,6 +103,7 @@ def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs):
     else:
         return tables.popitem()[1]
 
+
 def normalize_datetime(dt):
     if dt.microsecond == 0:
         return dt
@@ -101,17 +117,14 @@ def normalize_datetime(dt):
 
     return dt
 
+
 def has_date_elements(cell):
     """
     Try to use formatting to determine if a cell contains only time info.
 
     See: http://office.microsoft.com/en-us/excel-help/number-format-codes-HP005198679.aspx
     """
-    if 'd' in cell.number_format or \
-        'y' in cell.number_format:
-
-        return True
+    return 'd' in cell.number_format or 'y' in cell.number_format
 
-    return False
 
 agate.Table.from_xlsx = classmethod(from_xlsx)
diff --git a/docs/conf.py b/docs/conf.py
index 255f713..05d1315 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-
 #
+# flake8: noqa
+#
 # This file is execfile()d with the current directory set to its containing dir.
 #
 # Note that not all possible configuration values are present in this
@@ -45,16 +47,16 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'agate-excel'
-copyright = u'2015, Christopher Groskopf'
+copyright = u'2017, Christopher Groskopf'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
-version = '0.2.1'
+version = '0.2.3'
 # The full version, including alpha/beta/rc tags.
-release = '0.2.1'
+release = '0.2.3'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/example.py b/example.py
index 8b41846..c7a5c27 100755
--- a/example.py
+++ b/example.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import agate
-import agateexcel
+import agateexcel  # noqa
 
 table = agate.Table.from_xls('examples/test.xls')
 
diff --git a/requirements-py3.txt b/requirements-py3.txt
index b09f6c0..cc5825e 100644
--- a/requirements-py3.txt
+++ b/requirements-py3.txt
@@ -1,5 +1,5 @@
 nose>=1.1.2
-tox>=1.3
+tox>=3.1.0
 Sphinx>=1.2.2
 sphinx_rtd_theme>=0.1.6
 wheel>=0.24.0
diff --git a/setup.py b/setup.py
index 1d87130..0f020e6 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ install_requires = [
 
 setup(
     name='agate-excel',
-    version='0.2.1',
+    version='0.2.3',
     description='agate-excel adds read support for Excel files (xls and xlsx) to agate.',
     long_description=open('README.rst').read(),
     author='Christopher Groskopf',
@@ -26,9 +26,10 @@ setup(
         'Operating System :: OS Independent',
         'Programming Language :: Python',
         'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3.3',
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: Implementation :: CPython',
         'Programming Language :: Python :: Implementation :: PyPy',
         'Topic :: Multimedia :: Graphics',
diff --git a/tests/test_table_xls.py b/tests/test_table_xls.py
index 2194455..6d02d74 100644
--- a/tests/test_table_xls.py
+++ b/tests/test_table_xls.py
@@ -3,33 +3,40 @@
 
 import datetime
 
-try:
-    import unittest2 as unittest
-except ImportError:
-    import unittest
-
 import agate
-import agateexcel
+import agateexcel  # noqa
+
 
 class TestXLS(agate.AgateTestCase):
     def setUp(self):
         self.rows = (
             (1, 'a', True, '11/4/2015', '11/4/2015 12:22 PM'),
             (2, u'ðŸ‘', False, '11/5/2015', '11/4/2015 12:45 PM'),
-            (None, 'b', None, None, None)
+            (None, 'b', None, None, None),
         )
 
         self.column_names = [
-            'number', 'text', 'boolean', 'date', 'datetime'
+            'number', 'text', 'boolean', 'date', 'datetime',
+        ]
+
+        self.user_provided_column_names = [
+            'alt number', 'alt text', 'alt boolean', 'alt date', 'alt datetime',
         ]
 
         self.column_types = [
             agate.Number(), agate.Text(), agate.Boolean(),
-            agate.Date(), agate.DateTime()
+            agate.Date(), agate.DateTime(),
         ]
 
         self.table = agate.Table(self.rows, self.column_names, self.column_types)
 
+    def test_from_xls_with_column_names(self):
+        table = agate.Table.from_xls('examples/test.xls', header=False, skip_lines=1, column_names=self.user_provided_column_names )
+
+        self.assertColumnNames(table, self.user_provided_column_names)
+        self.assertColumnTypes(table, [agate.Number, agate.Text, agate.Boolean, agate.Date, agate.DateTime])
+        self.assertRows(table, [r.values() for r in self.table.rows])
+
     def test_from_xls(self):
         table = agate.Table.from_xls('examples/test.xls')
 
@@ -81,6 +88,18 @@ class TestXLS(agate.AgateTestCase):
         self.assertColumnTypes(table, [agate.Number, agate.Text, agate.Boolean, agate.Date, agate.DateTime])
         self.assertRows(table, [r.values() for r in self.table.rows])
 
+    def test_header(self):
+        table = agate.Table.from_xls('examples/test_zeros.xls', header=False)
+
+        self.assertColumnNames(table, ('a', 'b', 'c'))
+        self.assertColumnTypes(table, [agate.Text, agate.Text, agate.Text])
+        self.assertRows(table, [
+            ['ordinal', 'binary', 'all_zero'],
+            ['0.0', '0.0', '0.0'],
+            ['1.0', '1.0', '0.0'],
+            ['2.0', '1.0', '0.0'],
+        ])
+
     def test_zeros(self):
         table = agate.Table.from_xls('examples/test_zeros.xls')
 
@@ -89,7 +108,7 @@ class TestXLS(agate.AgateTestCase):
         self.assertRows(table, [
             [0, 0, 0],
             [1, 1, 0],
-            [2, 1, 0]
+            [2, 1, 0],
         ])
 
     def test_ambiguous_date(self):
@@ -98,7 +117,7 @@ class TestXLS(agate.AgateTestCase):
         self.assertColumnNames(table, ['s'])
         self.assertColumnTypes(table, [agate.Date])
         self.assertRows(table, [
-            [datetime.date(1900, 1, 1)]
+            [datetime.date(1900, 1, 1)],
         ])
 
     def test_empty(self):
@@ -114,5 +133,5 @@ class TestXLS(agate.AgateTestCase):
         self.assertColumnNames(table, ('Country', '2013.0', 'c'))
         self.assertColumnTypes(table, [agate.Text, agate.Number, agate.Text])
         self.assertRows(table, [
-            ['Canada', 35160000, 'value']
+            ['Canada', 35160000, 'value'],
         ])
diff --git a/tests/test_table_xlsx.py b/tests/test_table_xlsx.py
index b7fb38c..9b56b9b 100644
--- a/tests/test_table_xlsx.py
+++ b/tests/test_table_xlsx.py
@@ -3,33 +3,40 @@
 
 import datetime
 
-try:
-    import unittest2 as unittest
-except ImportError:
-    import unittest
-
 import agate
-import agateexcel
+import agateexcel  # noqa
+
 
 class TestXLSX(agate.AgateTestCase):
     def setUp(self):
         self.rows = (
             (1, 'a', True, '11/4/2015', '11/4/2015 12:22 PM'),
             (2, u'ðŸ‘', False, '11/5/2015', '11/4/2015 12:45 PM'),
-            (None, 'b', None, None, None)
+            (None, 'b', None, None, None),
         )
 
         self.column_names = [
-            'number', 'text', 'boolean', 'date', 'datetime'
+            'number', 'text', 'boolean', 'date', 'datetime',
+        ]
+
+        self.user_provided_column_names = [
+            'number', 'text', 'boolean', 'date', 'datetime',
         ]
 
         self.column_types = [
             agate.Number(), agate.Text(), agate.Boolean(),
-            agate.Date(), agate.DateTime()
+            agate.Date(), agate.DateTime(),
         ]
 
         self.table = agate.Table(self.rows, self.column_names, self.column_types)
 
+    def test_from_xlsx_with_column_names(self):
+        table = agate.Table.from_xlsx('examples/test.xlsx', header=False, skip_lines=1, column_names=self.user_provided_column_names)
+
+        self.assertColumnNames(table, self.user_provided_column_names)
+        self.assertColumnTypes(table, [agate.Number, agate.Text, agate.Boolean, agate.Date, agate.DateTime])
+        self.assertRows(table, [r.values() for r in self.table.rows])
+
     def test_from_xlsx(self):
         table = agate.Table.from_xlsx('examples/test.xlsx')
 
@@ -81,13 +88,25 @@ class TestXLSX(agate.AgateTestCase):
         self.assertColumnTypes(table, [agate.Number, agate.Text, agate.Boolean, agate.Date, agate.DateTime])
         self.assertRows(table, [r.values() for r in self.table.rows])
 
+    def test_header(self):
+        table = agate.Table.from_xls('examples/test_zeros.xls', header=False)
+
+        self.assertColumnNames(table, ('a', 'b', 'c'))
+        self.assertColumnTypes(table, [agate.Text, agate.Text, agate.Text])
+        self.assertRows(table, [
+            ['ordinal', 'binary', 'all_zero'],
+            ['0.0', '0.0', '0.0'],
+            ['1.0', '1.0', '0.0'],
+            ['2.0', '1.0', '0.0'],
+        ])
+
     def test_ambiguous_date(self):
         table = agate.Table.from_xlsx('examples/test_ambiguous_date.xlsx')
 
         self.assertColumnNames(table, ['s'])
         self.assertColumnTypes(table, [agate.Date])
         self.assertRows(table, [
-            [datetime.date(1899, 12, 31)]
+            [datetime.date(1899, 12, 31)],
         ])
 
     def test_empty(self):
@@ -103,5 +122,5 @@ class TestXLSX(agate.AgateTestCase):
         self.assertColumnNames(table, ['Country', '2013', 'c'])
         self.assertColumnTypes(table, [agate.Text, agate.Number, agate.Text])
         self.assertRows(table, [
-            ['Canada', 35160000, 'value']
+            ['Canada', 35160000, 'value'],
         ])
diff --git a/tox.ini b/tox.ini
index c75ecc9..779fba2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py27,py33,py34,py35,pypy
+envlist = py27,py34,py35,py36,py37,pypy
 
 [testenv]
 deps=
@@ -11,15 +11,19 @@ commands=nosetests
 deps=
     {[testenv]deps}
 
-[testenv:py33]
+[testenv:py34]
 deps=
     {[testenv]deps}
 
-[testenv:py34]
+[testenv:py35]
 deps=
     {[testenv:py33]deps}
 
-[testenv:py35]
+[testenv:py36]
+deps=
+    {[testenv:py33]deps}
+
+[testenv:py37]
 deps=
     {[testenv:py33]deps}
 
-- 
GitLab