Skip to content
Commits on Source (7)
......@@ -3,7 +3,6 @@ os:
- osx
language: c
sudo: false
env:
matrix:
......
......@@ -50,8 +50,8 @@ include htslib/htslib.pc.in
include htslib/htslib/*.h
include htslib/cram/*.c
include htslib/cram/*.h
include htslib/win/*.c
include htslib/win/*.h
include htslib/os/*.c
include htslib/os/*.h
include cy_build.py
include pysam.py
include requirements.txt
......
......@@ -5,6 +5,17 @@ http://pysam.readthedocs.io/en/latest/release.html
Release notes
=============
Release 0.15.2
==============
Bugfix release.
* [#746] catch pileup itorator out-of-scope segfaults
* [#747] fix faixd fetch with region
* [#748] increase max_pos to (1<<31)-1
* [#645] Add missing macOS stub files in `MANIFEST.in`, @SoapZA
* [#737] Fix bug in get_aligned_pairs, @bkohrn
Release 0.15.1
==============
......
python-pysam (0.15.2+ds-1) UNRELEASED; urgency=medium
* Team upload.
* New upstream version
* Standards-Version: 4.3.0, no changes needed
* added Py2 and Py3 versions of ${python:Provides}
* Fix lintian found spelling typos.
* debian/tests/control.autodep8 → debian/tests/control.
* remove errant log.txt from the packages.
-- Michael R. Crusoe <michael.crusoe@gmail.com> Wed, 16 Jan 2019 01:47:40 -0800
python-pysam (0.15.1+ds-1) unstable; urgency=medium
* Team upload.
......
......@@ -22,7 +22,7 @@ Build-Depends: debhelper (>= 11~),
bcftools (>= 1.9) <!nocheck>,
python-pytest <!nocheck>,
python3-pytest <!nocheck>
Standards-Version: 4.2.1
Standards-Version: 4.3.0
Vcs-Browser: https://salsa.debian.org/med-team/python-pysam
Vcs-Git: https://salsa.debian.org/med-team/python-pysam.git
Homepage: http://pysam.readthedocs.org/en/latest
......@@ -32,6 +32,7 @@ Architecture: any
Depends: ${shlibs:Depends},
${misc:Depends},
${python:Depends}
Provides: ${python:Provides}
Description: interface for the SAM/BAM sequence alignment and mapping format (Python 2)
Pysam is a Python module for reading and manipulating Samfiles. It's a
lightweight wrapper of the samtools C-API. Pysam also includes an interface
......@@ -44,6 +45,7 @@ Architecture: any
Depends: ${shlibs:Depends},
${misc:Depends},
${python3:Depends}
Provides: ${python3:Provides}
Description: interface for the SAM/BAM sequence alignment and mapping format (Python 3)
Pysam is a Python module for reading and manipulating Samfiles. It's a
lightweight wrapper of the samtools C-API. Pysam also includes an interface
......
skip_test_remote.patch
skip_test_needing_missing_data.patch
spelling
From: Michael R. Crusoe <michael.crusoe@gmail.com>
Subject: Fix spelling typos, courtesy of lintian
--- python-pysam.orig/bcftools/filter.c
+++ python-pysam/bcftools/filter.c
@@ -993,7 +993,7 @@
}
static int func_npass(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
{
- if ( nstack==0 ) error("Error parsing the expresion\n");
+ if ( nstack==0 ) error("Error parsing the expression\n");
token_t *tok = stack[nstack - 1];
if ( !tok->nsamples ) error("The function %s works with FORMAT fields\n", rtok->tag);
--- python-pysam.orig/bcftools/filter.c.pysam.c
+++ python-pysam/bcftools/filter.c.pysam.c
@@ -995,7 +995,7 @@
}
static int func_npass(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
{
- if ( nstack==0 ) error("Error parsing the expresion\n");
+ if ( nstack==0 ) error("Error parsing the expression\n");
token_t *tok = stack[nstack - 1];
if ( !tok->nsamples ) error("The function %s works with FORMAT fields\n", rtok->tag);
--- python-pysam.orig/pysam/libcalignedsegment.pyx
+++ python-pysam/pysam/libcalignedsegment.pyx
@@ -2244,7 +2244,7 @@
*value*.
An existing value of the same *tag* will be overwritten unless
- *replace* is set to False. This is usually not recommened as a
+ *replace* is set to False. This is usually not recommended as a
tag may only appear once in the optional alignment section.
If *value* is None, the tag will be deleted.
--- python-pysam.orig/pysam/libcalignmentfile.pyx
+++ python-pysam/pysam/libcalignmentfile.pyx
@@ -1023,7 +1023,7 @@
See :meth:`~pysam.HTSFile.parse_region` for more information
on how genomic regions can be specified. :term:`reference` and
- `end` are also accepted for backward compatiblity as synonyms
+ `end` are also accepted for backward compatibility as synonyms
for :term:`contig` and `stop`, respectively.
Without a `contig` or `region` all mapped reads in the file
@@ -1206,7 +1206,7 @@
"""perform a :term:`pileup` within a :term:`region`. The region is
specified by :term:`contig`, `start` and `stop` (using
0-based indexing). :term:`reference` and `end` are also accepted for
- backward compatiblity as synonyms for :term:`contig` and `stop`,
+ backward compatibility as synonyms for :term:`contig` and `stop`,
respectively. Alternatively, a samtools 'region' string
can be supplied.
@@ -1355,7 +1355,7 @@
The region is specified by :term:`contig`, `start` and `stop`.
:term:`reference` and `end` are also accepted for backward
- compatiblity as synonyms for :term:`contig` and `stop`,
+ compatibility as synonyms for :term:`contig` and `stop`,
respectively. Alternatively, a :term:`samtools` :term:`region`
string can be supplied.
@@ -1459,7 +1459,7 @@
The region is specified by :term:`contig`, `start` and `stop`.
:term:`reference` and `end` are also accepted for backward
- compatiblity as synonyms for :term:`contig` and `stop`,
+ compatibility as synonyms for :term:`contig` and `stop`,
respectively. Alternatively, a :term:`samtools` :term:`region`
string can be supplied. The coverage is computed per-base [ACGT].
--- python-pysam.orig/pysam/libchtslib.pxd
+++ python-pysam/pysam/libchtslib.pxd
@@ -2502,7 +2502,7 @@
# 2 if the file is a stream and thus unseekable
# 1 if the file contains an EOF block
# 0 if the file does not contain an EOF block
- # -1 if an error occured whilst reading the file or we could not seek back to where we were
+ # -1 if an error occurred whilst reading the file or we could not seek back to where we were
#
#
int cram_check_EOF(cram_fd *fd)
--- python-pysam.orig/pysam/libchtslib.pyx
+++ python-pysam/pysam/libchtslib.pyx
@@ -587,7 +587,7 @@
rval = hts_opt_apply(self.htsfile, opts)
if rval != 0:
hts_opt_free(opts)
- raise RuntimeError('An error occured while applying the requested format options')
+ raise RuntimeError('An error occurred while applying the requested format options')
hts_opt_free(opts)
def parse_region(self, contig=None, start=None, stop=None,
@@ -597,7 +597,7 @@
either be specified by :term:`contig`, `start` and
`stop`. `start` and `stop` denote 0-based, half-open
intervals. :term:`reference` and `end` are also accepted for
- backward compatiblity as synonyms for :term:`contig` and
+ backward compatibility as synonyms for :term:`contig` and
`stop`, respectively.
Alternatively, a samtools :term:`region` string can be
--- python-pysam.orig/pysam/libcutils.pyx
+++ python-pysam/pysam/libcutils.pyx
@@ -179,7 +179,7 @@
`end`. `start` and `end` denote 0-based, half-open intervals.
:term:`reference` and `end` are also accepted for backward
- compatiblity as synonyms for :term:`contig` and `stop`,
+ compatibility as synonyms for :term:`contig` and `stop`,
respectively.
Alternatively, a samtools :term:`region` string can be supplied.
......@@ -25,6 +25,7 @@ clean: clean-tests
override_dh_install: clean-tests
dh_install -Xtest.gtf.gz
find debian -name log.txt -delete
ifeq (,$(findstring nocheck, $(DEB_BUILD_OPTIONS)))
override_dh_auto_test: pysam_data.all cbcf_data.all
......
......@@ -7,6 +7,17 @@ Release 0.15.1
Bugfix release.
* [#746] catch pileup itorator out-of-scope segfaults
* [#747] fix faixd fetch with region
* [#748] increase max_pos to (1<<31)-1
* [#645] Add missing macOS stub files in `MANIFEST.in`, @SoapZA
* [#737] Fix bug in get_aligned_pairs, @bkohrn
Release 0.15.1
==============
Bugfix release.
* [#716] raise ValueError if tid is out of range when writing
* [#697] release version using cython 0.28.5 for python 3.7
compatibility
......
......@@ -402,6 +402,13 @@ cdef inline pack_tags(tags):
# use array.tostring() to retrieve byte representation and
# save as bytes
datafmt = "2sBBI%is" % (len(value) * DATATYPE2FORMAT[typecode][1])
if IS_PYTHON3:
args.extend([pytag[:2],
ord("B"),
typecode,
len(value),
value.tobytes()])
else:
args.extend([pytag[:2],
ord("B"),
typecode,
......@@ -1945,6 +1952,8 @@ cdef class AlignedSegment:
else:
for i from pos <= i < pos + l:
result.append((None, i))
else:
r_idx += l
pos += l
elif op == BAM_CHARD_CLIP:
......@@ -2845,6 +2854,10 @@ cdef class PileupColumn:
# out of sync.
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
if p == NULL:
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
if pileup_base_qual_skip(p, self.min_base_quality):
continue
pileups.append(makePileupRead(p, self.header))
......@@ -2887,8 +2900,15 @@ cdef class PileupColumn:
cdef uint32_t c = 0
cdef uint32_t cnt = 0
cdef bam_pileup1_t * p = NULL
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
if p == NULL:
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
if pileup_base_qual_skip(p, self.min_base_quality):
continue
cnt += 1
......@@ -2957,10 +2977,17 @@ cdef class PileupColumn:
cdef uint8_t * buf = self.buf
cdef bam_pileup1_t * p = NULL
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
# todo: reference sequence to count matches/mismatches
# todo: convert assertions to exceptions
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
if p == NULL:
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
if pileup_base_qual_skip(p, self.min_base_quality):
continue
# see samtools pileup_seq
......@@ -3062,6 +3089,11 @@ cdef class PileupColumn:
result = []
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
if p == NULL:
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
if p.qpos < p.b.core.l_qseq:
c = bam_get_qual(p.b)[p.qpos]
else:
......@@ -3079,11 +3111,19 @@ cdef class PileupColumn:
list: a list of quality scores
"""
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
cdef uint32_t x = 0
cdef bam_pileup1_t * p = NULL
result = []
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
if p == NULL:
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
if pileup_base_qual_skip(p, self.min_base_quality):
continue
result.append(p.b.core.qual)
......@@ -3097,12 +3137,19 @@ cdef class PileupColumn:
list: a list of read positions
"""
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
cdef uint32_t x = 0
cdef bam_pileup1_t * p = NULL
result = []
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
if p == NULL:
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
if pileup_base_qual_skip(p, self.min_base_quality):
continue
result.append(p.qpos)
......@@ -3116,11 +3163,19 @@ cdef class PileupColumn:
list: a list of query names at pileup column position.
"""
if self.plp == NULL or self.plp[0] == NULL:
raise ValueError("PileupColumn accessed after iterator finished")
cdef uint32_t x = 0
cdef bam_pileup1_t * p = NULL
result = []
for x from 0 <= x < self.n_pu:
p = &(self.plp[0][x])
if p == NULL:
raise ValueError(
"pileup buffer out of sync - most likely use of iterator "
"outside loop")
if pileup_base_qual_skip(p, self.min_base_quality):
continue
result.append(charptr_to_str(pysam_bam_get_qname(p.b)))
......
# cython: embedsignature=True
# cython: profile=True
########################################################
......@@ -62,6 +61,8 @@ import warnings
import array
from libc.errno cimport errno, EPIPE
from libc.string cimport strcmp, strpbrk, strerror
from libc.stdint cimport INT32_MAX
from cpython cimport array as c_array
from cpython.version cimport PY_MAJOR_VERSION
......@@ -94,7 +95,8 @@ IndexStats = collections.namedtuple("IndexStats",
########################################################
## global variables
# maximum genomic coordinace
cdef int MAX_POS = 2 << 29
# for some reason, using 'int' causes overlflow
cdef int MAX_POS = (1 << 31) - 1
# valid types for SAM headers
VALID_HEADER_TYPES = {"HD" : collections.Mapping,
......@@ -1314,7 +1316,8 @@ cdef class AlignmentFile(HTSFile):
an iterator over genomic positions.
"""
cdef int rtid, rstart, rstop, has_coord
cdef int rtid, has_coord
cdef int32_t rstart, rstop
if not self.is_open:
raise ValueError("I/O operation on closed file")
......@@ -2054,7 +2057,6 @@ cdef class IteratorRowRegion(IteratorRow):
IteratorRow.__init__(self, samfile,
multiple_iterators=multiple_iterators)
with nogil:
self.iter = sam_itr_queryi(
self.index,
......
......@@ -111,7 +111,7 @@ __all__ = ['VariantFile',
## Constants
########################################################################
cdef int MAX_POS = 2 << 29
cdef int MAX_POS = (1 << 31) - 1
cdef tuple VALUE_TYPES = ('Flag', 'Integer', 'Float', 'String')
cdef tuple METADATA_TYPES = ('FILTER', 'INFO', 'FORMAT', 'CONTIG', 'STRUCTURED', 'GENERIC')
cdef tuple METADATA_LENGTHS = ('FIXED', 'VARIABLE', 'A', 'G', 'R')
......
......@@ -287,19 +287,20 @@ cdef class FastaFile:
cdef char *ref
cdef int rstart, rend
reference, rstart, rend = parse_region(reference, start, end, region)
contig, rstart, rend = parse_region(reference, start, end, region)
if reference is None:
if contig is None:
raise ValueError("no sequence/region supplied.")
if rstart == rend:
return ""
ref = reference
contig_b = force_bytes(contig)
ref = contig_b
with nogil:
length = faidx_seq_len(self.fastafile, ref)
if length == -1:
raise KeyError("sequence '%s' not present" % reference)
raise KeyError("sequence '%s' not present" % contig)
if rstart >= length:
return ""
......@@ -315,7 +316,7 @@ cdef class FastaFile:
if errno:
raise IOError(errno, strerror(errno))
else:
raise ValueError("failure when retrieving sequence on '%s'" % reference)
raise ValueError("failure when retrieving sequence on '%s'" % contig)
try:
return charptr_to_str(seq)
......
......@@ -9,10 +9,9 @@
from posix.unistd cimport dup
from libc.errno cimport errno
from libc.stdint cimport INT32_MAX
from cpython cimport PyBytes_FromStringAndSize
from pysam.libchtslib cimport *
from pysam.libcutils cimport force_bytes, force_str, charptr_to_str, charptr_to_str_w_len
from pysam.libcutils cimport encode_filename, from_string_and_size
......@@ -41,7 +40,7 @@ DEF SEEK_CUR = 1
DEF SEEK_END = 2
# maximum genomic coordinace
cdef int MAX_POS = 2 << 29
cdef int MAX_POS = (1 << 31) - 1
cdef tuple FORMAT_CATEGORIES = ('UNKNOWN', 'ALIGNMENTS', 'VARIANTS', 'INDEX', 'REGIONS')
cdef tuple FORMATS = ('UNKNOWN', 'BINARY_FORMAT', 'TEXT_FORMAT', 'SAM', 'BAM', 'BAI', 'CRAM', 'CRAI',
......@@ -630,8 +629,8 @@ cdef class HTSFile(object):
"""
cdef int rtid
cdef long long rstart
cdef long long rstop
cdef int32_t rstart
cdef int32_t rstop
if reference is not None:
if contig is not None:
......@@ -644,7 +643,7 @@ cdef class HTSFile(object):
stop = end
if contig is None and tid is None and region is None:
return 0, 0, 0, 0
return 0, 0, 0, MAX_POS
rtid = -1
rstart = 0
......
......@@ -12,6 +12,7 @@ from cpython cimport PyBytes_Check, PyUnicode_Check
from cpython cimport array as c_array
from libc.stdlib cimport calloc, free
from libc.string cimport strncpy
from libc.stdint cimport INT32_MAX, int32_t
from libc.stdio cimport fprintf, stderr, fflush
from libc.stdio cimport stdout as c_stdout
from posix.fcntl cimport open as c_open, O_WRONLY
......@@ -24,7 +25,7 @@ from libcbcftools cimport bcftools_main, bcftools_set_stdout, bcftools_set_stder
#####################################################################
# hard-coded constants
cdef int MAX_POS = 2 << 29
cdef int MAX_POS = (1 << 31) - 1
#################################################################
# Utility functions for quality string conversions
......@@ -48,7 +49,10 @@ cpdef array_to_qualitystring(c_array.array qualities, int offset=33):
for x from 0 <= x < len(qualities):
result[x] = qualities[x] + offset
return force_str(result.tostring())
if IS_PYTHON3:
return force_str(result.tobytes())
else:
return result.tostring()
cpdef qualities_to_qualitystring(qualities, int offset=33):
......@@ -198,51 +202,58 @@ cpdef parse_region(contig=None,
for invalid or out of bounds regions.
"""
cdef long long rstart
cdef long long rend
cdef int32_t rstart
cdef int32_t rstop
if reference is not None:
if contig is not None:
reference = contig
raise ValueError('contig and reference should not both be specified')
contig = reference
if contig is not None and region is not None:
raise ValueError('contig/reference and region should not both be specified')
if end is not None:
if stop is not None:
end = stop
raise ValueError('stop and end should not both be specified')
stop = end
if contig is None and region is None:
raise ValueError("neither contig nor region are given")
rstart = 0
rend = MAX_POS
if start != None:
rstop = MAX_POS
if start is not None:
try:
rstart = start
except OverflowError:
raise ValueError('start out of range (%i)' % start)
if end != None:
if stop is not None:
try:
rend = end
rstop = stop
except OverflowError:
raise ValueError('end out of range (%i)' % end)
raise ValueError('stop out of range (%i)' % stop)
if region:
region = force_str(region)
if ":" in region:
contig, coord = region.split(":")
parts = coord.split("-")
rstart = int(parts[0]) - 1
if len(parts) >= 1:
rend = int(parts[1])
rstop = int(parts[1])
else:
contig = region
if not reference:
return None, 0, 0
if rstart > rstop:
raise ValueError('invalid coordinates: start (%i) > stop (%i)' % (rstart, rstop))
if not 0 <= rstart < MAX_POS:
raise ValueError('start out of range (%i)' % rstart)
if not 0 <= rend <= MAX_POS:
raise ValueError('end out of range (%i)' % rend)
if rstart > rend:
raise ValueError(
'invalid region: start (%i) > end (%i)' % (rstart, rend))
if not 0 <= rstop <= MAX_POS:
raise ValueError('stop out of range (%i)' % rstop)
return force_bytes(reference), rstart, rend
return contig, rstart, rstop
def _pysam_dispatch(collection,
......
# pysam versioning information
__version__ = "0.15.0"
__version__ = "0.15.2"
# TODO: upgrade number
__samtools_version__ = "1.9"
......
......@@ -430,6 +430,35 @@ class TestAlignedSegment(ReadTest):
(6, 27), (7, 28),
(8, 29), (9, 30)])
def test_equivalence_matches_only_and_with_seq(self):
a = self.build_read()
a.query_sequence = "ACGT" * 2
a.cigarstring = "4M1D4M"
a.set_tag("MD", "4^x4")
full = (list(zip(range(0, 4), range(20, 24), "ACGT")) +
[(None, 24, "x")] +
list(zip(range(4, 8), range(25, 29), "ACGT")))
self.assertEqual(
a.get_aligned_pairs(matches_only=False, with_seq=True), full)
self.assertEqual(
a.get_aligned_pairs(matches_only=True, with_seq=True),
[x for x in full if x[0] is not None and x[1] is not None])
a = self.build_read()
a.query_sequence = "ACGT" * 2
a.cigarstring = "4M1N4M"
a.set_tag("MD", "8")
full = (list(zip(range(0, 4), range(20, 24), "ACGT")) +
[(None, 24, None)] +
list(zip(range(4, 8), range(25, 29), "ACGT")))
self.assertEqual(
a.get_aligned_pairs(matches_only=False, with_seq=True), full)
self.assertEqual(
a.get_aligned_pairs(matches_only=True, with_seq=True),
[x for x in full if x[0] is not None and x[1] is not None])
def test_get_aligned_pairs_lowercase_md(self):
a = self.build_read()
a.query_sequence = "A" * 10
......
......@@ -121,6 +121,7 @@ class TestHeaderConstruction(unittest.TestCase):
self.compare_headers(header, self.header_without_text)
self.check_name_mapping(header)
class TestHeaderSAM(unittest.TestCase):
"""testing header manipulation"""
......@@ -287,6 +288,7 @@ class TestHeaderWriteRead(unittest.TestCase):
def check_read_write(self, flag_write, header):
fn = get_temp_filename()
print(fn)
with pysam.AlignmentFile(
fn,
flag_write,
......@@ -299,8 +301,12 @@ class TestHeaderWriteRead(unittest.TestCase):
with pysam.AlignmentFile(fn) as inf:
read_header = inf.header
os.unlink(fn)
# os.unlink(fn)
self.compare_headers(header, read_header)
expected_lengths = dict([(x["SN"], x["LN"]) for x in header["SQ"]])
self.assertEqual(expected_lengths,
dict(zip(read_header.references,
read_header.lengths)))
def test_SAM(self):
self.check_read_write("wh", self.header)
......@@ -310,6 +316,15 @@ class TestHeaderWriteRead(unittest.TestCase):
def test_CRAM(self):
header = copy.copy(self.header)
if "PG" in header:
# for CRAM, \t needs to be quoted:
header['PG'][1]['CL'] = re.sub(r"\t", r"\\\\t", header['PG'][1]['CL'])
self.check_read_write("wc", header)
class TestHeaderLargeContigs(TestHeaderWriteRead):
"""see issue 741"""
header = {'SQ': [{'LN': 2147483647, 'SN': 'chr1'},
{'LN': 1584, 'SN': 'chr2'}],
'HD': {'VN': '1.0'}}
......@@ -206,10 +206,19 @@ class TestPileupObjects(unittest.TestCase):
'''test if exception is raised if pileup col is accessed after
iterator is exhausted.'''
max_n = 0
for pileupcol in self.samfile.pileup():
pass
self.assertRaises(ValueError, getattr, pileupcol, "pileups")
if max_n < pileupcol.n:
max_col = pileupcol
max_n = pileupcol.n
self.assertRaises(ValueError, getattr, max_col, "pileups")
self.assertRaises(ValueError, max_col.get_query_sequences)
self.assertRaises(ValueError, max_col.get_num_aligned)
self.assertRaises(ValueError, max_col.get_query_qualities)
self.assertRaises(ValueError, max_col.get_mapping_qualities)
self.assertRaises(ValueError, max_col.get_query_positions)
self.assertRaises(ValueError, max_col.get_query_names)
class TestIteratorColumnBAM(unittest.TestCase):
......