Skip to content
Commits on Source (7)
fastaq (3.17.0-1) unstable; urgency=medium
* New upstream release.
* Use secure format link in d/copyright.
* Bump Standards-Version.
* Drop unneeded patches.
* Use debhelper 11.
-- Sascha Steinbiss <satta@debian.org> Thu, 22 Feb 2018 10:22:55 +0100
fastaq (3.16.0-1) unstable; urgency=medium
[ Steffen Moeller ]
......
......@@ -5,7 +5,7 @@ Uploaders: Andreas Tille <tille@debian.org>,
Sascha Steinbiss <satta@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 9),
Build-Depends: debhelper (>= 11),
dh-python,
python3,
python3-setuptools,
......@@ -14,7 +14,7 @@ Build-Depends: debhelper (>= 9),
samtools,
help2man
X-Python3-Version: >= 3.2
Standards-Version: 4.1.1
Standards-Version: 4.1.3
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/fastaq.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/fastaq.git
Homepage: https://github.com/sanger-pathogens/Fastaq
......
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: Fastaq
Source: https://github.com/sanger-pathogens/Fastaq
......
#delay-import-statements-for-manpage-creation.patch
Description: spelling
Author: Sascha Steinbiss <satta@debian.org>
--- a/pyfastaq/runners/make_random_contigs.py
+++ b/pyfastaq/runners/make_random_contigs.py
@@ -9,7 +9,7 @@
parser.add_argument('--name_by_letters', action='store_true', help='Name the contigs A,B,C,... will start at A again if you get to Z')
parser.add_argument('--prefix', help='Prefix to add to start of every sequence name', default='')
parser.add_argument('--seed', type=int, help='Seed for random number generator. Default is to use python\'s default', default=None)
- parser.add_argument('contigs', type=int, help='Nunber of contigs to make')
+ parser.add_argument('contigs', type=int, help='Number of contigs to make')
parser.add_argument('length', type=int, help='Length of each contig')
parser.add_argument('outfile', help='Name of output file')
options = parser.parse_args()
import copy
import re
import string
import random
import itertools
from collections import Counter
from pyfastaq import utils, intervals, genetic_codes
class Error (Exception): pass
......@@ -465,6 +464,47 @@ class Fasta:
'''Returns a Fasta sequence, translated into amino acids. Starts translating from 'frame', where frame expected to be 0,1 or 2'''
return Fasta(self.id, ''.join([genetic_codes.codes[genetic_code].get(self.seq[x:x+3].upper(), 'X') for x in range(frame, len(self)-1-frame, 3)]))
def gc_content(self, as_decimal=True):
"""Returns the GC content for the sequence.
Notes:
This method ignores N when calculating the length of the sequence.
It does not, however ignore other ambiguous bases. It also only
includes the ambiguous base S (G or C). In this sense the method is
conservative with its calculation.
Args:
as_decimal (bool): Return the result as a decimal. Setting to False
will return as a percentage. i.e for the sequence GCAT it will
return 0.5 by default and 50.00 if set to False.
Returns:
float: GC content calculated as the number of G, C, and S divided
by the number of (non-N) bases (length).
"""
gc_total = 0.0
num_bases = 0.0
n_tuple = tuple('nN')
accepted_bases = tuple('cCgGsS')
# counter sums all unique characters in sequence. Case insensitive.
for base, count in Counter(self.seq).items():
# dont count N in the number of bases
if base not in n_tuple:
num_bases += count
if base in accepted_bases: # S is a G or C
gc_total += count
gc_content = gc_total / num_bases
if not as_decimal: # return as percentage
gc_content *= 100
return gc_content
class Embl(Fasta):
'''Exactly the same as Fasta, but reading seqs from a file works differently'''
......
......@@ -520,6 +520,19 @@ class TestFasta(unittest.TestCase):
fa = sequences.Fasta('name', 'A')
fa.split_capillary_id()
def test_gc_content(self):
"""Test GC content calculation works as expected"""
tests = [
(sequences.Fasta('ID', 'cgCG'), 1.0),
(sequences.Fasta('ID', 'tTaA'), 0.0),
(sequences.Fasta('ID', 'GCAT'), 0.5),
(sequences.Fasta('ID', 'GCATNN'), 0.5),
(sequences.Fasta('ID', 'GCATNNS'), 0.6),
(sequences.Fasta('ID', 'GCATNNSK'), 0.5)
]
for test, answer in tests:
self.assertAlmostEqual(test.gc_content(), answer)
self.assertAlmostEqual(test.gc_content(as_decimal=False), answer * 100)
class TestEmbl(unittest.TestCase):
def test_get_id_from_header_line(self):
......
......@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
setup(
name='pyfastaq',
version='3.16.0',
version='3.17.0',
description='Script to manipulate FASTA and FASTQ files, plus API for developers',
packages = find_packages(),
author='Martin Hunt',
......