Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · ebb0d026 · ebb0d026
--- a/PKG-INFO
+++ b/PKG-INFO
-Metadata-Version: 1.1
+Metadata-Version: 2.1
 Name: HTSeq
-Version: 0.9.1
+Version: 0.10.0
 Summary: A framework to process and analyze data from high-throughput sequencing (HTS) assays
 Home-page: https://github.com/simon-anders/htseq
-Author: Fabio Zanini
-Author-email: fabio.zanini@stanford.edu
+Author: Simon Anders
+Author-email: sanders@fs.tum.de
+Maintainer: Fabio Zanini
+Maintainer-email: fabio.zanini@stanford.edu
 License: GPL3
 Description: 
              A framework to process and analyze data from high-throughput sequencing
@@ -21,3 +23,4 @@ Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)
 Classifier: Operating System :: POSIX
 Classifier: Programming Language :: Python
+Provides-Extra: htseq-qa
--- a/VERSION
+++ b/VERSION
-0.9.1
+0.10.0
--- a/debian/changelog
+++ b/debian/changelog
+htseq (0.10.0-1) unstable; urgency=medium
+
+  * New upstream version
+
+ -- Andreas Tille <tille@debian.org>  Mon, 28 May 2018 17:26:34 +0200
+
 htseq (0.9.1-1) unstable; urgency=medium

  * New upstream version

--- a/python2/HTSeq/__init__.py
+++ b/python2/HTSeq/__init__.py
@@ -3,7 +3,10 @@
 See htseq.readthedocs.io/en/master/index.html for documentation.
 """

-import itertools, warnings, os, shlex
+import itertools
+import warnings
+import os
+import shlex

 try:
    from _HTSeq import *
@@ -44,7 +47,7 @@ class FileOrSequence( object ):

    def __iter__(self):
        self.line_no = 1
-      if isinstance( self.fos, str ):
+        if isinstance(self.fos, basestring):
            if self.fos.lower().endswith((".gz", ".gzip")):
                lines = gzip.open(self.fos, 'rt')
            else:
@@ -83,7 +86,6 @@ class FileOrSequence( object ):
 #########################

 class GenomicFeature(object):
-   
    """A genomic feature, i.e., an interval on a genome with metadata.

    At minimum, the following information should be provided by slots:
@@ -144,6 +146,7 @@ class GenomicFeature( object ):
 _re_attr_main = re.compile("\s*([^\s\=]+)[\s=]+(.*)")
 _re_attr_empty = re.compile("^\s*$")

+
 def parse_GFF_attribute_string(attrStr, extra_return_first_value=False):
    """Parses a GFF attribute string and returns it as a dictionary.

@@ -158,10 +161,10 @@ def parse_GFF_attribute_string( attrStr, extra_return_first_value=False ):
        if _re_attr_empty.match(attr):
            continue
        if attr.count('"') not in (0, 2):
-         raise ValueError, "The attribute string seems to contain mismatched quotes."
+            raise ValueError("The attribute string seems to contain mismatched quotes.")
        mo = _re_attr_main.match(attr)
        if not mo:
-         raise ValueError, "Failure parsing GFF attribute line"
+            raise ValueError("Failure parsing GFF attribute line")
        val = mo.group(2)
        if val.startswith('"') and val.endswith('"'):
            val = val[1:-1]
@@ -174,10 +177,11 @@ def parse_GFF_attribute_string( attrStr, extra_return_first_value=False ):
    else:
        return d

+
 _re_gff_meta_comment = re.compile("##\s*(\S+)\s+(\S*)")

-class GFF_Reader( FileOrSequence ):

+class GFF_Reader(FileOrSequence):
    """Parse a GFF file

    Pass the constructor either a file name or an iterator of lines of a
@@ -192,7 +196,6 @@ class GFF_Reader( FileOrSequence ):
        self.end_included = end_included
        self.metadata = {}

-   
    def __iter__(self):
        for line in FileOrSequence.__iter__(self):
            if line == "\n":
@@ -256,7 +259,6 @@ def make_feature_dict( feature_sequence ):
    return res


-
 #########################
 ## GenomicArray
 #########################
@@ -272,6 +274,7 @@ def read_chrom_lens( filename, delimiter="\t" ):

 _re_fasta_header_line = re.compile(r'>\s*(\S+)\s*(.*)')

+
 class FastaReader(FileOrSequence):
    """A Fasta_Reader is associated with a FASTA file or an open connection
    to a file-like object with content in FASTA format.
@@ -365,6 +368,7 @@ class FastaReader( FileOrSequence ):
       else:
          return ans[0].get_reverse_complement()

+
 class FastqReader(FileOrSequence):
    """A Fastq object is associated with a FASTQ self.file. When an iterator
    is requested from the object, the FASTQ file is read.
@@ -376,7 +380,7 @@ class FastqReader( FileOrSequence ):
        FileOrSequence.__init__(self, file_)
        self.qual_scale = qual_scale
        if qual_scale not in ("phred", "solexa", "solexa-old"):
-         raise ValueError, "Illegal quality scale."
+            raise ValueError("Illegal quality scale.")
        self.raw_iterator = raw_iterator

    def __iter__(self):
@@ -388,7 +392,8 @@ class FastqReader( FileOrSequence ):
            qual = fin.next()
            if qual == "":
                if id1 != "":
-               warnings.warn( "Number of lines in FASTQ file is not "
+                    warnings.warn(
+                        "Number of lines in FASTQ file is not "
                        "a multiple of 4. Discarding the last, "
                        "incomplete record")
                break
@@ -412,6 +417,7 @@ class FastqReader( FileOrSequence ):
                                          self.qual_scale)
            yield s

+
 class BowtieReader(FileOrSequence):
    """A BowtieFile object is associated with a Bowtie output file that
    contains short read alignments. It can generate an iterator of Alignment
@@ -428,6 +434,7 @@ class BowtieReader( FileOrSequence ):
                  RuntimeWarning)
            yield algnt

+
 def bundle_multiple_alignments(sequence_of_alignments):
   """Some alignment programs, e.g., Bowtie, can output multiple alignments,
   i.e., the same read is reported consecutively with different alignments.
@@ -537,6 +544,7 @@ class SolexaExportReader( FileOrSequence ):
               start + len(fields['read_seq']), strand)
         yield record

+
 class SAM_Reader(FileOrSequence):
    """A SAM_Reader object is associated with a SAM file that
    contains short read alignments. It can generate an iterator of Alignment
@@ -554,8 +562,8 @@ class SAM_Reader( FileOrSequence ):
                raise
            yield algnt

-class GenomicArrayOfSets( GenomicArray ):

+class GenomicArrayOfSets(GenomicArray):
    """A GenomicArrayOfSets is a specialization of GenomicArray that allows to store
    sets of objects. On construction, the step vectors are initialized with empty sets.
    By using the 'add_value' method, objects can be added to intervals. If an object
@@ -871,7 +879,7 @@ class VCF_Reader( FileOrSequence ):

    def meta_info(self, header_filename=None):
        ret = []
-       if header_filename == None:
+        if header_filename is None:
            the_iter = FileOrSequence.__iter__(self)
        else:
            the_iter = open(header_filename, "r")
@@ -941,13 +949,15 @@ class WiggleReader( FileOrSequence ):
                    pos = int(tmp[0])
                    yield (GenomicInterval(chrom, pos, pos + span, '.'), float(tmp[1]))

+
 class BAM_Reader(object):

-    def __init__( self, filename ):
+    def __init__(self, filename, check_sq=True):
        global pysam
        self.filename = filename
        self.sf = None  # This one is only used by __getitem__
        self.record_no = -1
+        self.check_sq = check_sq
        try:
            import pysam
        except ImportError:
@@ -955,7 +965,7 @@ class BAM_Reader( object ):
            raise

    def __iter__(self):
-        sf = pysam.Samfile(self.filename, "rb")
+        sf = pysam.Samfile(self.filename, "rb", check_sq=self.check_sq)
        self.record_no = 0
        for pa in sf:
            #yield SAM_Alignment.from_pysam_AlignedRead(pa, sf)
@@ -963,7 +973,7 @@ class BAM_Reader( object ):
            self.record_no += 1

    def fetch(self, reference=None, start=None, end=None, region=None):
-        sf = pysam.Samfile(self.filename, "rb")
+        sf = pysam.Samfile(self.filename, "rb", check_sq=self.check_sq)
        self.record_no = 0
        try:
            for pa in sf.fetch(reference, start, end, region):
@@ -986,17 +996,17 @@ class BAM_Reader( object ):

    def __getitem__(self, iv):
        if not isinstance(iv, GenomicInterval):
-           raise TypeError, "Use a HTSeq.GenomicInterval to access regions within .bam-file!"        
+            raise TypeError("Use a HTSeq.GenomicInterval to access regions within .bam-file!")
        if self.sf is None:
-           self.sf = pysam.Samfile( self.filename, "rb" )
+            self.sf = pysam.Samfile(self.filename, "rb", check_sq=self.check_sq)
            # NOTE: pysam 0.9 has renames _hasIndex into has_index
            if (hasattr(self.sf, '_hasIndex') and (not self.sf._hasIndex())) or (not self.sf.has_index()):
-              raise ValueError, "The .bam-file has no index, random-access is disabled!"
+                raise ValueError("The .bam-file has no index, random-access is disabled!")
        for pa in self.sf.fetch(iv.chrom, iv.start+1, iv.end):
            yield SAM_Alignment.from_pysam_AlignedRead(pa, self.sf)

    def get_header_dict(self):
-       sf = pysam.Samfile(self.filename, "rb")
+        sf = pysam.Samfile(self.filename, "rb", check_sq=self.check_sq)
        return sf.header


@@ -1014,7 +1024,14 @@ class BAM_Writer( object ):
        self.referencelengths = referencelengths
        self.text = text
        self.header = header
-      self.sf = pysam.Samfile( self.filename, mode="wb", template = self.template, referencenames = self.referencenames, referencelengths = self.referencelengths, text = self.text, header = self.header )
+        self.sf = pysam.Samfile(
+                self.filename,
+                mode="wb",
+                template=self.template,
+                referencenames=self.referencenames,
+                referencelengths=self.referencelengths,
+                text=self.text,
+                header=self.header)

    @classmethod
    def from_BAM_Reader(cls, fn, br):
@@ -1039,13 +1056,12 @@ class BED_Reader( FileOrSequence ):
                continue
            fields = line.split()
            if len(fields) < 3:
-            raise ValueError, "BED file line contains less than 3 fields"
+                raise ValueError("BED file line contains less than 3 fields")
            if len(fields) > 9:
-            raise ValueError, "BED file line contains more than 9 fields"
+                raise ValueError("BED file line contains more than 9 fields")
            iv = GenomicInterval(fields[0], int(fields[1]), int(fields[2]), fields[5] if len(fields) > 5 else ".")
            f = GenomicFeature(fields[3] if len(fields) > 3 else "unnamed", "BED line", iv)
            f.score = float(fields[4]) if len(fields) > 4 else None
            f.thick = GenomicInterval(iv.chrom, int(fields[6]), int(fields[7]), iv.strand) if len(fields) > 7 else None
            f.itemRgb = [int(a) for a in fields[8].split(",")] if len(fields) > 8 else None
            yield(f)
-
--- a/python2/HTSeq/_version.py
+++ b/python2/HTSeq/_version.py
-__version__ = "0.9.1"
\ No newline at end of file
+__version__ = "0.10.0"
\ No newline at end of file
--- a/python2/HTSeq/scripts/count.py
+++ b/python2/HTSeq/scripts/count.py
@@ -87,6 +87,7 @@ def count_reads_in_features(sam_filenames, gff_filename,
            i += 1
            if i % 100000 == 0 and not quiet:
                sys.stderr.write("%d GFF lines processed.\n" % i)
+                sys.stderr.flush()
    except:
        sys.stderr.write(
            "Error occured when processing GFF file (%s):\n" %
@@ -95,6 +96,7 @@ def count_reads_in_features(sam_filenames, gff_filename,

    if not quiet:
        sys.stderr.write("%d GFF lines processed.\n" % i)
+        sys.stderr.flush()

    if len(counts) == 0:
        sys.stderr.write(
@@ -156,6 +158,7 @@ def count_reads_in_features(sam_filenames, gff_filename,
                    sys.stderr.write(
                        "%d SAM alignment record%s processed.\n" %
                        (i, "s" if not pe_mode else " pairs"))
+                    sys.stderr.flush()

                i += 1
                if not pe_mode:
@@ -298,6 +301,7 @@ def count_reads_in_features(sam_filenames, gff_filename,
            sys.stderr.write(
                "%d SAM %s processed.\n" %
                (i, "alignments " if not pe_mode else "alignment pairs"))
+            sys.stderr.flush()

        if samoutfile is not None:
            samoutfile.close()

--- a/python2/src/HTSeq/_HTSeq.pyx
+++ b/python2/src/HTSeq/_HTSeq.pyx
@@ -576,7 +576,9 @@ cdef class GenomicArray(object):
                                   self.storage, self.memmap_dir)
        else:
            self.chrom_vectors[chrom] = {
-                strand_nostrand:  ChromVector.create(iv, self.typecode, self.storage)}
+                strand_nostrand:  ChromVector.create(iv, self.typecode,
+                                                     self.storage,
+                                                     self.memmap_dir)}

    def __reduce__(self):
        return (_GenomicArray_unpickle, (self.stranded, self.typecode, self.chrom_vectors))

--- a/python3/HTSeq/__init__.py
+++ b/python3/HTSeq/__init__.py
@@ -993,11 +993,12 @@ class WiggleReader(FileOrSequence):

 class BAM_Reader(object):

-    def __init__(self, filename):
+    def __init__(self, filename, check_sq=True):
        global pysam
        self.filename = filename
        self.sf = None  # This one is only used by __getitem__
        self.record_no = -1
+        self.check_sq = check_sq
        try:
            import pysam
        except ImportError:
@@ -1006,7 +1007,7 @@ class BAM_Reader(object):
            raise

    def __iter__(self):
-        sf = pysam.Samfile(self.filename, "rb")
+        sf = pysam.Samfile(self.filename, "rb", check_sq=self.check_sq)
        self.record_no = 0
        for pa in sf:
            # yield SAM_Alignment.from_pysam_AlignedRead( pa, sf )
@@ -1014,7 +1015,7 @@ class BAM_Reader(object):
            self.record_no += 1

    def fetch(self, reference=None, start=None, end=None, region=None):
-        sf = pysam.Samfile(self.filename, "rb")
+        sf = pysam.Samfile(self.filename, "rb", check_sq=self.check_sq)
        self.record_no = 0
        try:
            for pa in sf.fetch(reference, start, end, region):
@@ -1041,7 +1042,7 @@ class BAM_Reader(object):
            raise TypeError(
                "Use a HTSeq.GenomicInterval to access regions within .bam-file!")
        if self.sf is None:
-            self.sf = pysam.Samfile(self.filename, "rb")
+            self.sf = pysam.Samfile(self.filename, "rb", check_sq=self.check_sq)
            # NOTE: pysam 0.9 has renames _hasIndex into has_index
            if (hasattr(self.sf, '_hasIndex') and (not self.sf._hasIndex())) or (not self.sf.has_index()):
                raise ValueError(
@@ -1050,7 +1051,7 @@ class BAM_Reader(object):
            yield SAM_Alignment.from_pysam_AlignedRead(pa, self.sf)

    def get_header_dict(self):
-        sf = pysam.Samfile(self.filename, "rb")
+        sf = pysam.Samfile(self.filename, "rb", check_sq=self.check_sq)
        return sf.header



--- a/python3/HTSeq/_version.py
+++ b/python3/HTSeq/_version.py
-__version__ = "0.9.1"
\ No newline at end of file
+__version__ = "0.10.0"
\ No newline at end of file
--- a/python3/HTSeq/scripts/count.py
+++ b/python3/HTSeq/scripts/count.py
@@ -87,6 +87,7 @@ def count_reads_in_features(sam_filenames, gff_filename,
            i += 1
            if i % 100000 == 0 and not quiet:
                sys.stderr.write("%d GFF lines processed.\n" % i)
+                sys.stderr.flush()
    except:
        sys.stderr.write(
            "Error occured when processing GFF file (%s):\n" %
@@ -95,6 +96,7 @@ def count_reads_in_features(sam_filenames, gff_filename,

    if not quiet:
        sys.stderr.write("%d GFF lines processed.\n" % i)
+        sys.stderr.flush()

    if len(counts) == 0:
        sys.stderr.write(
@@ -156,6 +158,7 @@ def count_reads_in_features(sam_filenames, gff_filename,
                    sys.stderr.write(
                        "%d SAM alignment record%s processed.\n" %
                        (i, "s" if not pe_mode else " pairs"))
+                    sys.stderr.flush()

                i += 1
                if not pe_mode:
@@ -298,6 +301,7 @@ def count_reads_in_features(sam_filenames, gff_filename,
            sys.stderr.write(
                "%d SAM %s processed.\n" %
                (i, "alignments " if not pe_mode else "alignment pairs"))
+            sys.stderr.flush()

        if samoutfile is not None:
            samoutfile.close()

--- a/python3/src/HTSeq/_HTSeq.pyx
+++ b/python3/src/HTSeq/_HTSeq.pyx
@@ -579,7 +579,9 @@ cdef class GenomicArray(object):
                                   self.storage, self.memmap_dir)
        else:
            self.chrom_vectors[chrom] = {
-                strand_nostrand:  ChromVector.create(iv, self.typecode, self.storage)}
+                strand_nostrand:  ChromVector.create(iv, self.typecode,
+                                                     self.storage,
+                                                     self.memmap_dir)}

    def __reduce__(self):
        return (_GenomicArray_unpickle, (self.stranded, self.typecode, self.chrom_vectors))
@@ -1513,11 +1515,18 @@ cdef class SAM_Alignment(AlignmentWithSequenceReversal):
        else:
            cigar = "*"

-        return '\t'.join((self.read.name, str(self.flag), query_start.chrom,
-                          str(query_start.start +
-                              1), str(self.aQual), cigar, mate_start.chrom,
-                          str(mate_start.pos + 1), str(self.inferred_insert_size),
-                          self.read_as_aligned.seq, self.read_as_aligned.qualstr,
+        return '\t'.join(
+                (self.read.name,
+                 str(self.flag),
+                 query_start.chrom,
+                 str(query_start.start + 1),
+                 str(self.aQual),
+                 cigar,
+                 mate_start.chrom,
+                 str(mate_start.pos + 1),
+                 str(self.inferred_insert_size),
+                 self.read_as_aligned.seq.decode(),
+                 self.read_as_aligned.qualstr.decode(),
                 '\t'.join(self.raw_optional_fields())))

    def optional_field(SAM_Alignment self, str tag):

--- a/setup.cfg
+++ b/setup.cfg
 [egg_info]
 tag_build = 
 tag_date = 0
-tag_svn_revision = 0