Michael R. Crusoe · Michael R. Crusoe · Michael R. Crusoe · Michael R. Crusoe · Michael R. Crusoe · Michael R. Crusoe
--- a/PKG-INFO
+++ b/PKG-INFO
 Metadata-Version: 1.1
 Name: gffutils
-Version: 0.9
+Version: 0.10.1
 Summary: Work with GFF and GTF files in a flexible database framework
 Home-page: https://github.com/daler/gffutils
 Author: Ryan Dale

--- a/debian/changelog
+++ b/debian/changelog
+python-gffutils (0.10.1-1) unstable; urgency=medium
+
+  * New upstream version
+  * Freshen patches, removing two applied upstream
+  * Remove obsolete fields Name from debian/upstream/metadata.
+
+ -- Michael R. Crusoe <michael.crusoe@gmail.com>  Wed, 01 Jan 2020 12:49:11 +0100
+
 python-gffutils (0.9-2) unstable; urgency=medium

  * debhelper 11

--- a/debian/patches/flexible_tests
+++ b/debian/patches/flexible_tests
-From: Michael R. Crusoe <michael.crusoe@gmail.com>
-Subject: Ensure that tests work post-installation
--- python-gffutils.orig/gffutils/test/test.py
-+++ python-gffutils/gffutils/test/test.py
-@@ -621,6 +621,7 @@
- 
-     # Serving test/data folder
-     served_folder = gffutils.example_filename('')
-+    savedir = os.getcwd()
-     os.chdir(served_folder)
- 
-     print("Starting SimpleHTTPServer in thread")
-@@ -654,6 +655,7 @@
-         print('Server shutdown.')
-         httpd.shutdown()
-         server_thread.join()
-+        os.chdir(savedir)
- 
- 
- def test_empty_files():
--- a/debian/patches/newer_bedtools
+++ b/debian/patches/newer_bedtools
@@ -6,46 +6,19 @@ Subject: Support newer bedtools version
 ---
 --- python-gffutils.orig/gffutils/iterators.py
 +++ python-gffutils/gffutils/iterators.py
-@@ -115,6 +115,7 @@
+@@ -115,7 +115,6 @@
     Subclass for iterating over features provided as a filename
     """
     def open_function(self, data):
-+        data = os.path.expanduser(data)
+-        data = os.path.expanduser(data)
         if data.endswith('.gz'):
             import gzip
             return gzip.open(data)
 --- python-gffutils.orig/gffutils/test/test.py
 +++ python-gffutils/gffutils/test/test.py
-@@ -880,7 +880,7 @@
-     assert len(filelist) == 1, filelist
-     assert filelist[0].endswith('.gffutils')
- 
-    #...and another one for gff. This time, make sure the suffix 
-+    #...and another one for gff. This time, make sure the suffix
-     db = gffutils.create_db(
-         gffutils.example_filename('FBgn0031208.gff'), ':memory:', _keep_tempfiles=True)
-     filelist = os.listdir(tempdir)
-@@ -1006,7 +1006,7 @@
- 
- 
- def test_deprecation_handler():
-    return 
-+    return
- 
-     # TODO: when infer_gene_extent actually gets deprecated, test here.
-     assert_raises(ValueError, gffutils.create_db,
-@@ -1141,7 +1141,7 @@
-     assert f.attributes['null'][0] == '\x00'
-     assert f.attributes['comma'][0] == ','
- 
-    # Commas indicate 
-+    # Commas indicate
-     assert f.attributes['Parent'] == ['A,', 'B%', 'C']
-     assert str(f) == s
- 
-@@ -1174,6 +1174,18 @@
-     assert db['e']['Note'] == [',']
-     assert db['f']['Note'] == [',']
+@@ -1315,6 +1315,18 @@
+     g = gffutils.feature.feature_from_line(str(f))
+     assert g == f
 
 +
 +def test_issue_105():
@@ -62,107 +35,3 @@ Subject: Support newer bedtools version
 if __name__ == "__main__":
     # this test case fails
     #test_attributes_modify()
--- python-gffutils.orig/gffutils/pybedtools_integration.py
-+++ python-gffutils/gffutils/pybedtools_integration.py
-@@ -2,11 +2,13 @@
- Module for integration with pybedtools
- """
- 
-+import os
- import pybedtools
- from pybedtools import featurefuncs
- from gffutils import helpers
- import six
- 
-+
- def to_bedtool(iterator):
-     """
-     Convert any iterator into a pybedtools.BedTool object.
-@@ -22,7 +24,7 @@
- 
- 
- def tsses(db, merge_overlapping=False, attrs=None, attrs_sep=":",
-          merge_kwargs=dict(o='distinct', s=True, c=4), as_bed6=False):
-+          merge_kwargs=None, as_bed6=False, bedtools_227_or_later=True):
-     """
-     Create 1-bp transcription start sites for all transcripts in the database
-     and return as a sorted pybedtools.BedTool object pointing to a temporary
-@@ -74,13 +76,21 @@
-         attributes is supplied, e.g. ["gene_id", "transcript_id"], then these
-         will be joined by `attr_join_sep` and then placed in the name field.
- 
-
-     attrs_sep: str
-         If `as_bed6=True` or `merge_overlapping=True`, then use this character
-         to separate attributes in the name field of the output BED. If also
-         using `merge_overlapping=True`, you'll probably want this to be
-         different than `merge_sep` in order to parse things out later.
- 
-+    bedtools_227_or_later : bool
-+        In version 2.27, BEDTools changed the output for merge. By default,
-+        this function expects BEDTools version 2.27 or later, but set this to
-+        False to assume the older behavior.
-+
-+        For testing purposes, the environment variable
-+        GFFUTILS_USES_BEDTOOLS_227_OR_LATER is set to either "true" or "false"
-+        and is used to override this argument.
-+
-     Examples
-     --------
- 
-@@ -146,7 +156,22 @@
- 
- 
-     """
-    _merge_kwargs = dict(o='distinct', s=True, c=4)
-+    _override = os.environ.get('GFFUTILS_USES_BEDTOOLS_227_OR_LATER', None)
-+    if _override is not None:
-+        if _override == 'true':
-+            bedtools_227_or_later = True
-+        elif _override == 'false':
-+            bedtools_227_or_later = False
-+        else:
-+            raise ValueError(
-+                "Unknown value for GFFUTILS_USES_BEDTOOLS_227_OR_LATER "
-+                "environment variable: {0}".format(_override))
-+
-+    if bedtools_227_or_later:
-+        _merge_kwargs = dict(o='distinct', s=True, c='4,5,6')
-+    else:
-+        _merge_kwargs = dict(o='distinct', s=True, c='4')
-+
-     if merge_kwargs is not None:
-         _merge_kwargs.update(merge_kwargs)
- 
-@@ -195,18 +220,18 @@
-         x = x.each(to_bed).saveas()
- 
-     if merge_overlapping:
-
-        def fix_merge(f):
-            f = featurefuncs.extend_fields(f, 6)
-            return pybedtools.Interval(
-                f.chrom,
-                f.start,
-                f.stop,
-                f[4],
-                '.',
-                f[3])
-
-        x = x.merge(**_merge_kwargs).each(fix_merge).saveas()
-
-+        if bedtools_227_or_later:
-+            x = x.merge(**_merge_kwargs)
-+        else:
-+            def fix_merge(f):
-+                f = featurefuncs.extend_fields(f, 6)
-+                return pybedtools.Interval(
-+                    f.chrom,
-+                    f.start,
-+                    f.stop,
-+                    f[4],
-+                    '.',
-+                    f[3])
-+            x = x.merge(**_merge_kwargs).saveas().each(fix_merge).saveas()
- 
-     return x
--- a/debian/patches/py37
+++ b/debian/patches/py37
-From 7ecd042d46d88ea6f4e1baabaa4421bc0954f284 Mon Sep 17 00:00:00 2001
-From: Abishek <jesuisabhishek@gmail.com>
-Date: Fri, 22 Feb 2019 01:46:07 +0530
-Subject: [PATCH] "raise StopIteration" to "return"
-
---
- gffutils/iterators.py | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
--- python-gffutils.orig/gffutils/iterators.py
-+++ python-gffutils/gffutils/iterators.py
-@@ -130,7 +130,7 @@
-             self.current_item_number = i
- 
-             if line == '##FASTA' or line.startswith('>'):
-                raise StopIteration
-+                return
- 
-             if line.startswith('##'):
-                 self._directive_handler(line)
--- a/debian/patches/series
+++ b/debian/patches/series
-py37
 newer_bedtools
-flexible_tests
 python3
--- a/debian/upstream/metadata
+++ b/debian/upstream/metadata
@@ -7,4 +7,3 @@ Registry:
  Entry: GFFutils
 - Name: conda:bioconda
  Entry: gffutils
-Name: gffutils
--- a/gffutils.egg-info/PKG-INFO
+++ b/gffutils.egg-info/PKG-INFO
 Metadata-Version: 1.1
 Name: gffutils
-Version: 0.9
+Version: 0.10.1
 Summary: Work with GFF and GTF files in a flexible database framework
 Home-page: https://github.com/daler/gffutils
 Author: Ryan Dale

--- a/gffutils.egg-info/SOURCES.txt
+++ b/gffutils.egg-info/SOURCES.txt
@@ -16,9 +16,9 @@ gffutils/feature.py
 gffutils/gffwriter.py
 gffutils/helpers.py
 gffutils/inspect.py
-gffutils/inspection.py
 gffutils/interface.py
 gffutils/iterators.py
+gffutils/merge_criteria.py
 gffutils/parser.py
 gffutils/pybedtools_integration.py
 gffutils/version.py
@@ -36,8 +36,11 @@ gffutils/test/feature_test.py
 gffutils/test/helpers_test.py
 gffutils/test/parser_test.py
 gffutils/test/performance_test.py
+gffutils/test/synth_test_base.py
 gffutils/test/test.py
 gffutils/test/test_biopython_integration.py
+gffutils/test/test_merge.py
+gffutils/test/test_merge_all.py
 gffutils/test/data/F3-unique-3.v2.gff
 gffutils/test/data/FBgn0031208.gff
 gffutils/test/data/FBgn0031208.gtf
@@ -67,6 +70,7 @@ gffutils/test/data/mouse_extra_comma.gff3
 gffutils/test/data/ncbi_gff3.txt
 gffutils/test/data/nonascii
 gffutils/test/data/random-chr.gff
+gffutils/test/data/synthetic.gff3
 gffutils/test/data/unsanitized.gff
 gffutils/test/data/wormbase_gff2.txt
 gffutils/test/data/wormbase_gff2_alt.txt
\ No newline at end of file
--- a/gffutils.egg-info/requires.txt
+++ b/gffutils.egg-info/requires.txt
-pyfaidx
-six
-argh
-argcomplete
+pyfaidx>=0.5.5.2
+six>=1.12.0
+argh>=0.26.2
+argcomplete>=1.9.4
 simplejson
--- a/gffutils/attributes.py
+++ b/gffutils/attributes.py
 import six
 import collections
+
+try:
+    collectionsAbc = collections.abc
+except AttributeError:
+    collectionsAbc = collections
 from gffutils import constants


 # collections.MutableMapping is apparently the best way to provide dict-like
 # interface (http://stackoverflow.com/a/3387975)
-class Attributes(collections.MutableMapping):
+class Attributes(collectionsAbc.MutableMapping):
    def __init__(self, *args, **kwargs):
        """
        An Attributes object acts much like a dictionary.  However, values are

--- a/gffutils/bins.py
+++ b/gffutils/bins.py
@@ -53,6 +53,7 @@ OFFSETS = [
 # for BED (0-based, half-open) or GFF (1-based, closed intervals)
 COORD_OFFSETS = {'bed': 0, 'gff': 1}

+MAX_CHROM_SIZE = 2 ** 29

 def bins(start, stop, fmt='gff', one=True):
    """
@@ -72,6 +73,15 @@ def bins(start, stop, fmt='gff', one=True):
    fmt : 'gff' | 'bed'
        This specifies 1-based start coords (gff) or 0-based start coords (bed)
    """
+
+    # For very large coordinates, return 1 which is "somewhere on the
+    # chromosome".
+    if start >= MAX_CHROM_SIZE or stop >= MAX_CHROM_SIZE:
+        if one:
+            return 1
+        else:
+            return set([1])
+
    # Jump to highest resolution bin that will fit these coords (depending on
    # whether we have a BED or GFF-style coordinate).
    #
@@ -114,6 +124,7 @@ def bins(start, stop, fmt='gff', one=True):
        # larger bin size)
        start >>= NEXT_SHIFT
        stop >>= NEXT_SHIFT
+
    return bins


@@ -214,6 +225,12 @@ def test():
    assert bins(1, -1000, one=True) == 1
    assert bins(1, -1000, one=False) == set([1])

+    # Juuuust fits inside the max chrom size
+    assert bins(536870910, 536870911, one=True) == 8776
+
+    # Too big; falls back to 1.
+    assert bins(536870911, 536870912, one=True) == 1
+
 if __name__ == "__main__":
    print_bin_sizes()
    test()
--- a/gffutils/biopython_integration.py
+++ b/gffutils/biopython_integration.py
@@ -5,7 +5,6 @@ objects.
 import six
 try:
    from Bio.SeqFeature import SeqFeature, FeatureLocation
-    from Bio import SeqIO
 except ImportError:
    import warnings
    warnings.warn("BioPython must be installed to use this module")
@@ -79,4 +78,4 @@ def from_seqfeature(s, **kwargs):
    attributes.pop('seqid', '.')
    attributes.pop('frame', '.')
    return Feature(seqid, source, featuretype, start, stop, score, strand,
-                   frame, attributes, **kwargs)
+                   frame, attributes, id=id, **kwargs)
--- a/gffutils/create.py
+++ b/gffutils/create.py
@@ -48,7 +48,7 @@ def deprecation_handler(kwargs):

 class _DBCreator(object):
    def __init__(self, data, dbfn, force=False, verbose=False, id_spec=None,
-                 merge_strategy='merge', checklines=10, transform=None,
+                 merge_strategy='error', checklines=10, transform=None,
                 force_dialect_check=False, from_string=False, dialect=None,
                 default_encoding='utf-8',
                 disable_infer_genes=False,
@@ -97,7 +97,6 @@ class _DBCreator(object):
        self.disable_infer_genes = disable_infer_genes
        self.disable_infer_transcripts = disable_infer_transcripts

-        self._autoincrements = collections.defaultdict(int)
        if force:
            if os.path.exists(dbfn):
                os.unlink(dbfn)
@@ -124,7 +123,10 @@ class _DBCreator(object):
            force_dialect_check=force_dialect_check, from_string=from_string,
            dialect=dialect
        )
-
+        if '_autoincrements' in kwargs:
+            self._autoincrements = kwargs['_autoincrements']
+        else:
+            self._autoincrements = collections.defaultdict(int)

    def set_verbose(self, verbose=None):
        if verbose == 'debug':
@@ -207,7 +209,8 @@ class _DBCreator(object):
            Raise error

        "warning"
-            Log a warning
+            Log a warning, which indicates that all future instances of the
+            same ID will be ignored

        "merge":
            Combine old and new attributes -- but only if everything else
@@ -248,8 +251,8 @@ class _DBCreator(object):
                             % ([i.id for i in self._candidate_merges(f)]))

            # If force_merge_fields was provided, don't pay attention to these
-            # fields if they're different.  We are assuming attributes will be
-            # different, hence the [:-1]
+            # fields if they're different. We are assuming the attributes field
+            # will be different, hence the [:-1]
            _gffkeys_to_check = list(
                set(constants._gffkeys[:-1])
                .difference(self.force_merge_fields))
@@ -486,7 +489,7 @@ class _DBCreator(object):
        c.execute('CREATE INDEX seqidstartendstrand ON features (seqid, start, end, strand)')

        # speeds computation 1000x in some cases
-        logger.info("Running ANALYSE features")
+        logger.info("Running ANALYZE features")
        c.execute('ANALYZE features')

        self.conn.commit()
@@ -505,6 +508,7 @@ class _DBCreator(object):
        self._update_relations()
        self._finalize()

+    # TODO: not sure this is used anywhere
    def update(self, iterator):
        self._populate_from_lines(iterator)
        self._update_relations()
@@ -645,7 +649,7 @@ class _GFFDBCreator(_DBCreator):
        else:
            suffix = '.gffutils'
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix).name
-        fout = open(tmp, 'w')
+        with open(tmp, 'w') as fout:

            # Here we look for "grandchildren" -- for each ID, get the child
            # (parenthetical subquery below); then for each of those get *its*
@@ -662,10 +666,10 @@ class _GFFDBCreator(_DBCreator):
                           ''', tuple(parent))
                for grandchild in c2:
                    fout.write('\t'.join((parent[0], grandchild[0])) + '\n')
-        fout.close()

        def relations_generator():
-            for line in open(fout.name):
+            with open(fout.name) as fin:
+                for line in fin:
                    parent, child = line.strip().split('\t')
                    yield dict(parent=parent, child=child, level=2)

@@ -720,7 +724,7 @@ class _GTFDBCreator(_DBCreator):
                    warnings.warn(
                        "It appears you have a transcript feature in your GTF "
                        "file. You may want to use the "
-                        "`disable_infer_transcripts` "
+                        "`disable_infer_transcripts=True` "
                        "option to speed up database creation")
                elif (
                    f.featuretype == 'gene' and
@@ -729,7 +733,7 @@ class _GTFDBCreator(_DBCreator):
                    warnings.warn(
                        "It appears you have a gene feature in your GTF "
                        "file. You may want to use the "
-                        "`disable_infer_genes` "
+                        "`disable_infer_genes=True` "
                        "option to speed up database creation")

            lines_seen = i + 1
@@ -838,9 +842,9 @@ class _GTFDBCreator(_DBCreator):
            suffix = self._keep_tempfiles
        else:
            suffix = '.gffutils'
-        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix).name
-        fout = open(tmp, 'w')

+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix).name
+        with open(tmp, 'w') as fout:
            self._tmpfile = tmp

            # This takes some explanation...
@@ -959,15 +963,14 @@ class _GTFDBCreator(_DBCreator):
                    last_gene_id = gene_id
                    n_features += 1

-        fout.close()
-
        def derived_feature_generator():
            """
            Generator of items from the file that was just created...
            """
            keys = ['parent', 'seqid', 'start', 'end', 'strand',
                    'featuretype', 'bin', 'attributes']
-            for line in open(fout.name):
+            with open(fout.name) as fin:
+                for line in fin:
                    d = dict(list(zip(keys, line.strip().split('\t'))))
                    d.pop('parent')
                    d['score'] = '.'
@@ -1009,7 +1012,8 @@ class _GTFDBCreator(_DBCreator):
        if not self._keep_tempfiles:
            os.unlink(fout.name)

-        # TODO: recreate indexes?
+        # TODO: recreate indexes? Typically the _finalize() method will be
+        # called after this one, and indexes are created in _finalize().


 def create_db(data, dbfn, id_spec=None, force=False, verbose=False,

--- a/gffutils/feature.py
+++ b/gffutils/feature.py
 from pyfaidx import Fasta
 import six
-import simplejson
+import simplejson as json
 from gffutils import constants
 from gffutils import helpers
 from gffutils import parser
@@ -145,7 +145,7 @@ class Feature(object):
                attributes = helpers._unjsonify(attributes, isattributes=True)

            # it's a string but not JSON: assume original attributes string.
-            except simplejson.JSONDecodeError:
+            except json.JSONDecodeError:

                # But Feature.attributes is still a dict
                attributes, _dialect = parser._split_keyvals(attributes)
@@ -159,7 +159,7 @@ class Feature(object):
        if isinstance(extra, six.string_types):
            try:
                extra = helpers._unjsonify(extra)
-            except simplejson.JSONDecodeError:
+            except json.JSONDecodeError:
                extra = extra.split('\t')

        self.seqid = seqid

--- a/gffutils/helpers.py
+++ b/gffutils/helpers.py
 import copy
 import sys
 import os
-import simplejson
+import simplejson as json
 import time
 import tempfile
 import six
@@ -256,16 +256,16 @@ def _bin_from_dict(d):
 def _jsonify(x):
    """Use most compact form of JSON"""
    if isinstance(x, dict_class):
-        return simplejson.dumps(x._d, separators=(',', ':'))
-    return simplejson.dumps(x, separators=(',', ':'))
+        return json.dumps(x._d, separators=(',', ':'))
+    return json.dumps(x, separators=(',', ':'))


 def _unjsonify(x, isattributes=False):
    """Convert JSON string to an ordered defaultdict."""
    if isattributes:
-        obj = simplejson.loads(x)
+        obj = json.loads(x)
        return dict_class(obj)
-    return simplejson.loads(x)
+    return json.loads(x)


 def _feature_to_fields(f, jsonify=True):
@@ -318,7 +318,7 @@ def merge_attributes(attr1, attr2):
    """

    new_d = copy.deepcopy(attr1)
-    new_d.update(attr2)
+    new_d.update(copy.deepcopy(attr2))

    #all of attr2 key : values just overwrote attr1, fix it
    for k, v in new_d.items():
@@ -330,7 +330,7 @@ def merge_attributes(attr1, attr2):
            if not isinstance(v, list):
                v = [v]
            new_d[k].extend(v)
-    return dict((k, list(set(v))) for k, v in new_d.items())
+    return dict((k, sorted(set(v))) for k, v in new_d.items())


 def dialect_compare(dialect1, dialect2):

--- a/gffutils/inspection.py
+++ b/gffutils/inspection.py
-from gffutils import iterators
-from gffutils import interface
-from collections import Counter
-import sys
-
-
-def inspect(data, look_for=['featuretype', 'chrom', 'attribute_keys',
-                            'feature_count'], limit=None, verbose=True):
-    """
-    Inspect a GFF or GTF data source.
-
-    This function is useful for figuring out the different featuretypes found
-    in a file (for potential removal before creating a FeatureDB).
-
-    Returns a dictionary with a key for each item in `look_for` and
-    a corresponding value that is a dictionary of how many of each unique item
-    were found.
-
-    There will always be a `feature_count` key, indicating how many features
-    were looked at (if `limit` is provided, then `feature_count` will be the
-    same as `limit`).
-
-    For example, if `look_for` is ['chrom', 'featuretype'], then the result
-    will be a dictionary like::
-
-        {
-            'chrom': {
-                'chr1': 500,
-                'chr2': 435,
-                'chr3': 200,
-                ...
-                ...
-            }.
-
-            'featuretype': {
-                'gene': 150,
-                'exon': 324,
-                ...
-            },
-
-            'feature_count': 5000
-
-        }
-
-
-    Parameters
-    ----------
-    data : str, FeatureDB instance, or iterator of Features
-        If `data` is a string, assume it's a GFF or GTF filename.  If it's
-        a FeatureDB instance, then its `all_features()` method will be
-        automatically called. Otherwise, assume it's an iterable of Feature
-        objects.
-
-    look_for : list
-        List of things to keep track of. Options are:
-
-            - any attribute of a Feature object, such as chrom, source, start,
-              stop, strand.
-
-            - "attribute_keys", which will look at all the individual
-              attribute keys of each feature
-
-    limit : int
-        Number of features to look at.  Default is no limit.
-
-    verbose : bool
-        Report how many features have been processed.
-
-    Returns
-    -------
-    dict
-    """
-
-    results = {}
-    obj_attrs = []
-    for i in look_for:
-        if i not in ['attribute_keys', 'feature_count']:
-            obj_attrs.append(i)
-        results[i] = Counter()
-
-    attr_keys = 'attribute_keys' in look_for
-
-    d = iterators.DataIterator(data)
-    feature_count = 0
-    for f in d:
-        if verbose:
-            sys.stderr.write('\r%s features inspected' % feature_count)
-            sys.stderr.flush()
-
-        for obj_attr in obj_attrs:
-            results[obj_attr].update([getattr(f, obj_attr)])
-
-        if attr_keys:
-            results['attribute_keys'].update(f.attributes.keys())
-
-        feature_count += 1
-        if limit and feature_count == limit:
-            break
-
-    new_results = {}
-    for k, v in results.items():
-        new_results[k] = dict(v)
-
-    new_results['feature_count'] = feature_count
-    return new_results
--- a/gffutils/interface.py
+++ b/gffutils/interface.py
+import collections
 import os
 import six
 import sqlite3
@@ -6,10 +7,62 @@ import warnings
 from gffutils import bins
 from gffutils import helpers
 from gffutils import constants
+from gffutils import merge_criteria as mc
 from gffutils.feature import Feature
 from gffutils.exceptions import FeatureNotFoundError


+def assign_child(parent, child):
+    """
+    Helper for add_relation()
+    Sets 'Parent' attribute to parent['ID']
+
+    Parameters
+    ----------
+
+    parent : Feature
+        Parent Feature
+
+    :param parent: parent Feature
+
+    child : Feature
+        Child Feature
+
+    Returns
+    -------
+
+    Child Feature
+    """
+    child.attributes['Parent'] = parent['ID']
+    return child
+
+
+# Reusable constant for FeatureDB.merge()
+no_children = tuple()
+
+def _finalize_merge(feature, feature_children):
+    """
+    Helper for FeatureDB.merge() to update source and assign children property
+
+    Parameters
+    ----------
+    feature : Feature
+        feature to finalise
+    
+    feature_children
+        list of children to assign
+    
+    Returns
+    -------
+    feature, modified
+    """
+    if len(feature_children) > 1:
+        feature.source = ','.join(set(child.source for child in feature_children))
+        feature.children = feature_children
+    else:
+        feature.children = no_children
+    return feature
+
 class FeatureDB(object):
    # docstring to be filled in for methods that call out to
    # helpers.make_query()
@@ -147,7 +200,7 @@ class FeatureDB(object):
            '''
            SELECT base, n FROM autoincrements
            ''')
-        self._autoincrements = dict(c)
+        self._autoincrements = collections.defaultdict(int, c)

        self.set_pragmas(pragmas)

@@ -604,17 +657,15 @@ class FeatureDB(object):
        # Only use bins if we have defined boundaries and completely_within is
        # True. Otherwise you can't know how far away a feature stretches
        # (which means bins are not computable ahead of time)
+        _bin_clause = ''
        if (start is not None) and (end is not None) and completely_within:
+            if start <= bins.MAX_CHROM_SIZE and end <= bins.MAX_CHROM_SIZE:
                _bins = list(bins.bins(start, end, one=False))
                # See issue #45
                if len(_bins) < 900:
                    _bin_clause = ' or ' .join(['bin = ?' for _ in _bins])
                    _bin_clause = 'AND ( %s )' % _bin_clause
                    args += _bins
-            else:
-                _bin_clause = ''
-        else:
-            _bin_clause = ''

        query = ' '.join([
            constants._SELECT,
@@ -662,11 +713,9 @@ class FeatureDB(object):
        Providing N features will return N - 1 new features.

        This method purposefully does *not* do any merging or sorting of
-        coordinates, so you may want to use :meth:`FeatureDB.merge` first.
-
-        The new features' attributes will be a merge of the neighboring
-        features' attributes.  This is useful if you have provided a list of
-        exons; the introns will then retain the transcript and/or gene parents.
+        coordinates, so you may want to use :meth:`FeatureDB.merge` first, or
+        when selecting features use the `order_by` kwarg, e.g.,
+        `db.features_of_type('gene', order_by=('seqid', 'start'))`.

        Parameters
        ----------
@@ -678,6 +727,14 @@ class FeatureDB(object):
            then the featuretypes will be constructed from the neighboring
            features, e.g., `inter_exon_exon`.

+        merge_attributes : bool
+            If True, new features' attributes will be a merge of the neighboring
+            features' attributes.  This is useful if you have provided a list of
+            exons; the introns will then retain the transcript and/or gene
+            parents as a single item. Otherwise, if False, the attribute will
+            be a comma-separated list of values, potentially listing the same
+            gene ID twice.
+
        attribute_func : callable or None
            If None, then nothing special is done to the attributes.  If
            callable, then the callable accepts two attribute dictionaries and
@@ -705,9 +762,21 @@ class FeatureDB(object):
            if new_featuretype is None:
                new_featuretype = 'inter_%s_%s' % (
                    last_feature.featuretype, f.featuretype)
-            assert last_feature.strand == f.strand
-            assert last_feature.chrom == f.chrom
-            strand = last_feature.strand
+            if last_feature.strand != f.strand:
+                new_strand = '.'
+            else:
+                new_strand = f.strand
+
+            if last_feature.chrom != f.chrom:
+                # We've moved to a new chromosome.  For example, if we're
+                # getting intergenic regions from all genes, they will be on
+                # different chromosomes. We still assume sorted features, but
+                # don't complain if they're on different chromosomes -- just
+                # move on.
+                last_feature = f
+                continue
+
+            strand = new_strand
            chrom = last_feature.chrom

            # Shrink
@@ -748,6 +817,7 @@ class FeatureDB(object):
                    dialect = None
            yield self._feature_returner(**fields)
            interfeature_start = f.stop
+            last_feature = f

    def delete(self, features, make_backup=True, **kwargs):
        """
@@ -797,7 +867,19 @@ class FeatureDB(object):

    def update(self, data, make_backup=True, **kwargs):
        """
-        Update database with features in `data`.
+        Update the on-disk database with features in `data`.
+
+        WARNING: If you used any non-default kwargs for gffutils.create_db when
+        creating the database in the first place (especially
+        `disable_infer_transcripts` or `disable_infer_genes`) then you should
+        use those same arguments here.
+
+        The returned object is the same FeatureDB, but since it is pointing to
+        the same database and that has been just updated, the new features can
+        now be accessed.
+
+        Parameters
+        ----------

        data : str, iterable, FeatureDB instance
            If FeatureDB, all data will be used. If string, assume it's
@@ -810,15 +892,15 @@ class FeatureDB(object):
            makes a copy of the existing database and saves it with a .bak
            extension.

-        Notes
-        -----
-        Other kwargs are used in the same way as in gffutils.create_db; see the
-        help for that function for details.
+        kwargs :
+            Additional kwargs are passed to gffutils.create_db; see the help
+            for that function for details.

        Returns
        -------
-        FeatureDB with updated features.
+        Returns self but with the underlying database updated.
        """
+
        from gffutils import create
        from gffutils import iterators
        if make_backup:
@@ -833,6 +915,7 @@ class FeatureDB(object):

        # Handle all sorts of input
        data = iterators.DataIterator(data, **_iterator_kwargs)
+        kwargs['_autoincrements'] = self._autoincrements

        if self.dialect['fmt'] == 'gtf':
            if 'id_spec' not in kwargs:
@@ -849,10 +932,18 @@ class FeatureDB(object):
        else:
            raise ValueError

+        peek, data._iter = iterators.peek(data._iter, 1)
+        if len(peek) == 0: return db  # If the file is empty then do nothing
+
        db._populate_from_lines(data)
        db._update_relations()
+
+        # Note that the autoincrements gets updated here
        db._finalize()
-        return db
+
+        # Read it back in directly from the stored autoincrements table
+        self._autoincrements.update(db._autoincrements)
+        return self

    def add_relation(self, parent, child, level, parent_func=None,
                     child_func=None):
@@ -1000,8 +1091,11 @@ class FeatureDB(object):
            ):
                yield intron

-    def merge(self, features, ignore_strand=False):
+    def _old_merge(self, features, ignore_strand=False):
        """
+        DEPRECATED, only retained here for backwards compatibility. Please use
+        merge().
+
        Merge overlapping features together.

        Parameters
@@ -1095,6 +1189,197 @@ class FeatureDB(object):
            attributes='')
        yield self._feature_returner(**merged_feature)

+
+    def merge(self, features,
+              merge_criteria=(mc.seqid, mc.overlap_end_inclusive, mc.strand, mc.feature_type),
+              multiline=False):
+        """
+        Merge features matching criteria together
+
+
+        Returned Features have a special property called 'children' that is
+        a list of the component features.  This only exists for the lifetime of
+        the Feature instance.
+
+        Parameters
+        ----------
+        features : iterable
+            Iterable of Feature instances to merge
+
+        merge_criteria : list
+            List of merge criteria callbacks. All must evaluate to True in
+            order for a feature to be merged. See notes below on callback
+            signature.
+
+        multiline : bool
+            True to emit multiple features with the same ID attribute, False
+            otherwise.
+
+        Returns
+        -------
+        Generator yielding merged Features
+
+        Notes
+        -----
+
+        See the `gffutils.merge_criteria` module (imported here as `mc`) for
+        existing callback functions. For writing custom callbacks, functions
+        must have the following signature::
+
+            callback(
+                acc: gffutils.Feature,
+                cur: gffutils.Feature,
+                components: [gffutils.Feature]
+            ) -> bool
+
+        Where:
+
+            - `acc`: current accumulated feature
+            - `cur`: candidate feature to merge
+            - `components`: list of features that compose acc
+
+        The function should return True to merge `cur` into `acc`, False to set
+        `cur` to `acc` (that is, start a new merged feature).
+
+
+        If merge criteria allows different feature types then the merged
+        features' feature types should have their featuretype property
+        reassigned to a more specific ontology value.
+        """
+        if not isinstance(merge_criteria, list):
+            try:
+                merge_criteria = list(merge_criteria)
+            except TypeError:
+                merge_criteria = [merge_criteria]
+
+        # To start, we create a merged feature of just the first feature.
+        features = iter(features)
+        last_id = None
+        current_merged = None
+        feature_children = []
+
+        for feature in features:
+            if current_merged is None:
+                if all(criteria(feature, feature, feature_children) for criteria in merge_criteria):
+                    current_merged = feature
+                    feature_children = [feature]
+                else:
+                    yield _finalize_merge(feature, no_children)
+                    last_id = None
+                continue
+
+            if len(feature_children) == 0:  # current_merged is last feature and unchecked
+                if all(criteria(current_merged, current_merged, feature_children) for criteria in merge_criteria):
+                    feature_children.append(current_merged)
+                else:
+                    yield _finalize_merge(current_merged, no_children)
+                    current_merged = feature
+                    last_id = None
+                    continue
+
+            if all(criteria(current_merged, feature, feature_children) for criteria in merge_criteria):
+                # Criteria satisfied, merge
+                # TODO Test multiline records and iron out the following code
+                # if multiline and (feature.start > current_merged.end + 1 or feature.end + 1 < current_merged.start):
+                #    # Feature is possibly multiline (discontiguous), keep ID but start new record
+                #    yield _finalize_merge(current_merged, feature_children)
+                #    current_merged = feature
+                #    feature_children = [feature]
+
+                if len(feature_children) == 1:
+                    # Current merged is only child and merge is going to occur, make copy
+                    current_merged = vars(current_merged).copy()
+                    del current_merged['attributes']
+                    del current_merged['extra']
+                    del current_merged['dialect']
+                    del current_merged['keep_order']
+                    del current_merged['sort_attribute_values']
+                    current_merged = self._feature_returner(**current_merged)
+                    if not last_id:
+                        # Generate unique ID for new Feature
+                        self._autoincrements[current_merged.featuretype] += 1
+                        last_id = current_merged.featuretype + '_' + str(
+                            self._autoincrements[current_merged.featuretype])
+                    current_merged['ID'] = last_id
+                    current_merged.id = last_id
+
+                feature_children.append(feature)
+
+                # Set mismatched properties to ambiguous values
+                if feature.seqid not in current_merged.seqid.split(','): current_merged.seqid += ',' + feature.seqid
+                if feature.strand != current_merged.strand: current_merged.strand = '.'
+                if feature.frame != current_merged.frame: current_merged.frame = '.'
+                if feature.featuretype != current_merged.featuretype: current_merged.featuretype = "sequence_feature"
+
+                if feature.start < current_merged.start:
+                    # Extends prior, so set a new start position
+                    current_merged.start = feature.start
+
+                if feature.end > current_merged.end:
+                    # Extends further, so set a new stop position
+                    current_merged.end = feature.end
+
+            else:
+                yield _finalize_merge(current_merged, feature_children)
+                current_merged = feature
+                feature_children = []
+                last_id = None
+
+        if current_merged:
+            yield _finalize_merge(current_merged, feature_children)
+
+    def merge_all(self,
+                  merge_order=('seqid', 'featuretype', 'strand', 'start'),
+                  merge_criteria=(mc.seqid, mc.overlap_end_inclusive, mc.strand, mc.feature_type),
+                  featuretypes_groups=(None,),
+                  exclude_components=False):
+        """
+        Merge all features in database according to criteria.
+        Merged features will be assigned as children of the merged record.
+        The resulting records are added to the database.
+
+        Parameters
+        ----------
+        merge_order : list
+            Ordered list of columns with which to group features before evaluating criteria
+        merge_criteria : list
+            List of merge criteria callbacks. See merge().
+        featuretypes_groups : list
+            iterable of sets of featuretypes to merge together
+        exclude_components : bool
+            True: child features will be discarded. False to keep them.
+
+        Returns
+        -------
+            list of merge features
+        """
+
+        if not len(featuretypes_groups):
+            # Can't be empty
+            featuretypes_groups = (None,)
+
+        result_features = []
+
+        # Merge features per featuregroup
+        for featuregroup in featuretypes_groups:
+            for merged in self.merge(self.all_features(featuretype=featuregroup, order_by=merge_order),
+                                merge_criteria=merge_criteria):
+                # If feature is result of merge
+                if merged.children:
+                    self._insert(merged, self.conn.cursor())
+                    if exclude_components:
+                        # Remove child features from DB
+                        self.delete(merged.children)
+                    else:
+                        # Add child relations to DB
+                        for child in merged.children:
+                            self.add_relation(merged, child, 1, child_func=assign_child)
+                    result_features.append(merged)
+                else:
+                    pass  # Do nothing, feature is already in DB
+
+        return result_features
+
    def children_bp(self, feature, child_featuretype='exon', merge=False,
                    ignore_strand=False):
        """

--- a/gffutils/iterators.py
+++ b/gffutils/iterators.py
@@ -115,6 +115,7 @@ class _FileIterator(_BaseIterator):
    Subclass for iterating over features provided as a filename
    """
    def open_function(self, data):
+        data = os.path.expanduser(data)
        if data.endswith('.gz'):
            import gzip
            return gzip.open(data)
@@ -130,7 +131,7 @@ class _FileIterator(_BaseIterator):
            self.current_item_number = i

            if line == '##FASTA' or line.startswith('>'):
-                raise StopIteration
+                return

            if line.startswith('##'):
                self._directive_handler(line)

--- a/gffutils/merge_criteria.py
+++ b/gffutils/merge_criteria.py
+"""
+Merge criteria used by FeatureDB merge() and merge_all()
+"""
+from gffutils.feature import Feature
+
+
+def seqid(acc, cur, components):
+    return cur.seqid == acc.seqid
+
+
+def strand(acc, cur, components):
+    return acc.strand == cur.strand
+
+
+def feature_type(acc, cur, components):
+    return acc.featuretype == cur.featuretype
+
+
+def exact_coordinates_only(acc, cur, components):
+    return cur.start == acc.start and cur.stop == acc.end
+
+
+def overlap_end_inclusive(acc, cur, components):
+    return acc.start <= cur.start <= acc.end + 1
+
+
+def overlap_start_inclusive(acc, cur, components):
+    return acc.start <= cur.end + 1 <= acc.end + 1
+
+
+def overlap_any_inclusive(acc, cur, components):
+    return acc.start <= cur.start <= acc.end + 1 or acc.start <= cur.end + 1 <= acc.end + 1
+
+
+def overlap_end_threshold(threshold):
+    def partial(acc, cur, components):
+        return acc.start <= cur.start <= acc.end + threshold
+    return partial
+
+
+def overlap_start_threshold(threshold):
+    def partial(acc, cur, components):
+        return acc.start - threshold <= cur.end + 1 <= acc.end + 1
+
+    return partial
+
+
+def overlap_any_threshold(threshold):
+    def partial(acc, cur, components):
+        return acc.start - threshold <= cur.end + 1 <= acc.end + 1 or acc.start <= cur.start <= acc.end + threshold
+    return partial