Skip to content
Commits on Source (4)
......@@ -11,8 +11,8 @@ python:
- 'pypy'
- 'pypy3'
install:
- pip wheel -f wheelhouse coverage biopython cython pysam pyvcf || true
- pip install -f wheelhouse biopython cython pysam pyfasta coverage pyvcf || true
- pip wheel -f wheelhouse coverage biopython cython pysam pyvcf numpy || true
- pip install -f wheelhouse biopython cython pysam pyfasta coverage pyvcf numpy || true
- python setup.py install
- if [ ! -f samtools-1.2 ]; then curl -sL https://github.com/samtools/samtools/releases/download/1.2/samtools-1.2.tar.bz2 | tar -xjv; fi
- cd samtools-1.2
......
......@@ -261,6 +261,17 @@ Sequence names are truncated on any whitespace. This is a limitation of the inde
gi|557361099|gb|KF435150.1| Homo sapiens MDM4 protein variant Y (MDM4) mRNA, complete cds, alternatively spliced
gi|557361097|gb|KF435149.1| Homo sapiens MDM4 protein variant G (MDM4) mRNA, complete cds
Records can be accessed efficiently as numpy arrays:
.. code:: python
# new in v0.5.4
>>> from pyfaidx import Fasta
>>> import numpy as np
>>> genes = Fasta('tests/data/genes.fasta')
>>> np.asarray(genes['NM_001282543.1'])
array(['C', 'C', 'C', ..., 'A', 'A', 'A'], dtype='|S1')
Sequence can be buffered in memory using a read-ahead buffer
for fast sequential access:
......
python-pyfaidx (0.5.4-1) unstable; urgency=medium
* New upstream version
-- Andreas Tille <tille@debian.org> Tue, 29 May 2018 08:57:08 +0200
python-pyfaidx (0.5.3.1-1) unstable; urgency=medium
* New upstream version
......
......@@ -5,6 +5,7 @@ Fasta file -> Faidx -> Fasta -> FastaRecord -> Sequence
from __future__ import division
import os
import sys
from os.path import getmtime
from six import PY2, PY3, string_types, integer_types
from six.moves import zip_longest
......@@ -19,9 +20,12 @@ import warnings
from math import ceil
from threading import Lock
if sys.version_info > (3, ):
buffer = memoryview
dna_bases = re.compile(r'([ACTGNactgnYRWSKMDVHBXyrwskmdvhbx]+)')
__version__ = '0.5.3.1'
__version__ = '0.5.4'
class KeyFunctionError(ValueError):
......@@ -762,6 +766,7 @@ class Faidx(object):
class FastaRecord(object):
__slots__ = ['name', '_fa']
def __init__(self, name, fa):
self.name = name
self._fa = fa
......@@ -887,6 +892,16 @@ class FastaRecord(object):
""" Read the actual defline from self._fa.faidx mdshw5/pyfaidx#54 """
return self._fa.faidx.get_long_name(self.name)
@property
def __array_interface__(self):
""" Implement numpy array interface for issue #139"""
return {
'shape': (len(self), ),
'typestr': '|S1',
'version': 3,
'data': buffer(str(self).encode('ascii'))
}
class MutableFastaRecord(FastaRecord):
def __init__(self, name, fa):
......
......@@ -9,6 +9,7 @@ from difflib import Differ
path = os.path.dirname(__file__)
os.chdir(path)
class TestFastaRecord(TestCase):
def setUp(self):
pass
......@@ -28,8 +29,9 @@ class TestFastaRecord(TestCase):
reference_upper = Fasta(filename, sequence_always_upper=True)
reference_normal = Fasta(filename)
os.remove('data/genes.fasta.lower.fai')
assert reference_upper['gi|557361099|gb|KF435150.1|'][1:100].seq == reference_normal['gi|557361099|gb|KF435150.1|'][1:100].seq.upper()
assert reference_upper['gi|557361099|gb|KF435150.1|'][
1:100].seq == reference_normal['gi|557361099|gb|KF435150.1|'][
1:100].seq.upper()
def test_long_names(self):
""" Test that deflines extracted using FastaRecord.long_name are
......@@ -60,7 +62,8 @@ class TestFastaRecord(TestCase):
line_len = len(line)
fasta_uniform_len.write(line)
elif line_len > len(line):
fasta_uniform_len.write(line.rstrip() + b'N' * (line_len - len(line)) + b'\n')
fasta_uniform_len.write(line.rstrip() + b'N' *
(line_len - len(line)) + b'\n')
else:
fasta_uniform_len.write(line)
fasta = Fasta('data/issue_62.fa', as_raw=True)
......@@ -98,6 +101,14 @@ class TestFastaRecord(TestCase):
os.remove('data/padded.fasta')
os.remove('data/padded.fasta.fai')
def test_numpy_array(self):
""" Test the __array_interface__ """
import numpy
filename = "data/genes.fasta.lower"
reference = Fasta(filename)
np_array = numpy.asarray(reference[0])
assert isinstance(np_array, numpy.ndarray)
class TestMutableFastaRecord(TestCase):
def setUp(self):
......@@ -124,7 +135,8 @@ class TestMutableFastaRecord(TestCase):
fasta = Fasta('data/genes.fasta', mutable=False)
chunk = fasta['gi|557361099|gb|KF435150.1|'][0:100]
mutable['gi|557361099|gb|KF435150.1|'][0:100] = chunk.seq
assert str(fasta['gi|557361099|gb|KF435150.1|']) == str(mutable['gi|557361099|gb|KF435150.1|'])
assert str(fasta['gi|557361099|gb|KF435150.1|']) == str(
mutable['gi|557361099|gb|KF435150.1|'])
def test_mutate_fasta_to_N(self):
mutable = Fasta('data/genes_mutable.fasta', mutable=True)
......