Skip to content
Commits on Source (6)
......@@ -5,7 +5,7 @@ Antimicrobial Resistance Identification By Assembly
For how to use ARIBA, please see the [ARIBA wiki page][ARIBA wiki].
[![Build Status](https://travis-ci.org/sanger-pathogens/ariba.svg?branch=master)](https://travis-ci.org/sanger-pathogens/ariba)
[![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-brightgreen.svg)](https://github.com/ssjunnebo/ariba/blob/master/LICENSE)
[![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-brightgreen.svg)](https://github.com/sanger-pathogens/ariba/blob/master/LICENSE)
[![status](https://img.shields.io/badge/MGEN-10.1099%2Fmgen.0.000131-brightgreen.svg)](http://mgen.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000131)
## Contents
......
......@@ -59,7 +59,7 @@ class Assembly:
self.threads = threads
if extern_progs is None:
self.extern_progs = external_progs.ExternalProgs()
self.extern_progs = external_progs.ExternalProgs(using_spades=self.assembler == 'spades')
else:
self.extern_progs = extern_progs
......
......@@ -27,7 +27,7 @@ class Runner:
self.length_diff_cutoff = length_diff_cutoff
self.verbose = verbose
self.min_cluster_number = min_cluster_number
extern_progs = external_progs.ExternalProgs(fail_on_error=True)
extern_progs = external_progs.ExternalProgs(fail_on_error=True, using_spades=False)
self.cd_hit_est = extern_progs.exe('cdhit')
......
......@@ -130,7 +130,7 @@ class Cluster:
self.log_fh = None
if extern_progs is None:
self.extern_progs = external_progs.ExternalProgs()
self.extern_progs = external_progs.ExternalProgs(using_spades=self.assembler == 'spades')
else:
self.extern_progs = extern_progs
......
......@@ -20,10 +20,19 @@ prog_to_default = {
prog_to_env_var = {x: 'ARIBA_' + x.upper() for x in prog_to_default if x not in {'nucmer'}}
# Nucmer 3.1 'nucmer --version' outputs this:
# nucmer
# NUCmer (NUCleotide MUMmer) version 3.1
#
# Numcer 4 'nucmer --version' outputs this:
# 4.0.0beta2
#
# ... make the regex permissive and hope things
# still work for later versions
prog_to_version_cmd = {
'bowtie2': ('--version', re.compile('.*bowtie2.*version (.*)$')),
'cdhit': ('', re.compile('CD-HIT version ([0-9\.]+) \(')),
'nucmer': ('--version', re.compile('^NUCmer \(NUCleotide MUMmer\) version ([0-9\.]+)')),
'nucmer': ('--version', re.compile('([0-9]+\.[0-9\.]+.*$)')),
'spades': ('--version', re.compile('SPAdes\s+v([0-9\.]+)'))
}
......@@ -40,11 +49,12 @@ prog_optional = set([
])
class ExternalProgs:
def __init__(self, verbose=False, fail_on_error=True):
def __init__(self, verbose=False, fail_on_error=True, using_spades=False):
self.progs = {}
self.version_report = []
self.all_deps_ok = True
self.versions = {}
self.using_spades = using_spades
if verbose:
print('{:_^79}'.format(' Checking dependencies and their versions '))
......@@ -53,6 +63,9 @@ class ExternalProgs:
warnings = []
for prog in sorted(prog_to_default):
if prog == 'spades' and not self.using_spades:
continue
msg_sink = errors
if prog in prog_optional:
msg_sink = warnings
......
......@@ -20,7 +20,7 @@ class ReadFilter:
self.log_fh = log_fh
if extern_progs is None:
self.extern_progs = external_progs.ExternalProgs()
self.extern_progs = external_progs.ExternalProgs(using_spades=False)
else:
self.extern_progs = extern_progs
......
......@@ -7,6 +7,8 @@ import tarfile
import pyfastaq
import time
import json
import subprocess
import sys
from ariba import common, card_record, vfdb_parser, megares_data_finder, megares_zip_parser
......@@ -186,6 +188,19 @@ class RefGenesGetter:
print('and in your methods say that version', self.version, 'of the database was used')
@classmethod
def _get_genetic_epi_database_from_bitbucket(cls, db_name, outdir, git_commit=None):
assert db_name in {'plasmidfinder', 'resfinder', 'virulence_finder'}
cmd = 'git clone ' + 'https://bitbucket.org/genomicepidemiology/' + db_name + '_db.git ' + outdir
common.syscall(cmd)
if git_commit is not None:
common.syscall('cd ' + outdir + ' && git checkout ' + git_commit)
print('Using this git commit for ' + db_name + ' database:')
subprocess.check_call('cd ' + outdir + ' && git log -n 1', shell=True)
def _get_from_resfinder(self, outprefix):
outprefix = os.path.abspath(outprefix)
final_fasta = outprefix + '.fa'
......@@ -193,6 +208,7 @@ class RefGenesGetter:
tmpdir = outprefix + '.tmp.download'
current_dir = os.getcwd()
if self.version =='old':
try:
os.mkdir(tmpdir)
os.chdir(tmpdir)
......@@ -204,6 +220,10 @@ class RefGenesGetter:
print('Downloading data with:', cmd, sep='\n')
common.syscall(cmd)
common.syscall('unzip ' + zipfile)
else:
RefGenesGetter._get_genetic_epi_database_from_bitbucket('resfinder', tmpdir, git_commit=self.version)
os.chdir(tmpdir)
print('Combining downloaded fasta files...')
fout_fa = pyfastaq.utils.open_file_write(final_fasta)
......@@ -222,7 +242,7 @@ class RefGenesGetter:
except:
description = '.'
# names are not unique across the files
# names are not unique across the files
if seq.id in used_names:
used_names[seq.id] += 1
seq.id += '_' + str(used_names[seq.id])
......@@ -310,6 +330,7 @@ class RefGenesGetter:
tmpdir = outprefix + '.tmp.download'
current_dir = os.getcwd()
if self.version == 'old':
try:
os.mkdir(tmpdir)
os.chdir(tmpdir)
......@@ -321,6 +342,9 @@ class RefGenesGetter:
print('Downloading data with:', cmd, sep='\n')
common.syscall(cmd)
common.syscall('unzip ' + zipfile)
else:
RefGenesGetter._get_genetic_epi_database_from_bitbucket('plasmidfinder', tmpdir, git_commit=self.version)
os.chdir(tmpdir)
print('Combining downloaded fasta files...')
fout_fa = pyfastaq.utils.open_file_write(final_fasta)
......@@ -357,8 +381,13 @@ class RefGenesGetter:
def _get_from_srst2_argannot(self, outprefix):
srst2_version = '0.2.0'
srst2_url = 'https://github.com/katholt/srst2/raw/v' + srst2_version + '/data/ARGannot.r1.fasta'
if self.version is None:
self.version = 'r2'
if self.version not in {'r1', 'r2'}:
raise Error('srst2_argannot version must be r1 or r2. Got this: ' + self.version)
version_string = '.r1' if self.version == 'r1' else '_r2'
srst2_url = 'https://raw.githubusercontent.com/katholt/srst2/master/data/ARGannot' + version_string + '.fasta'
srst2_fa = outprefix + '.original.fa'
command = 'wget -O ' + srst2_fa + ' ' + srst2_url
common.syscall(command, verbose=True)
......@@ -389,7 +418,9 @@ class RefGenesGetter:
print('If you use this downloaded data, please cite:')
print('"SRST2: Rapid genomic surveillance for public health and hospital microbiology labs",\nInouye et al 2014, Genome Medicine, PMID: 25422674\n')
print(argannot_ref)
print('and in your methods say that the ARG-ANNOT sequences were used from version', srst2_version, 'of SRST2.')
# Use to also output the version of SRST2 here, but the r2 version of their
# fasta file was made after SRST2 release 0.2.0. At the time of writing this,
# 0.2.0 is the latest release, ie r2 isn't in an SRST2 release.
def _get_from_vfdb_core(self, outprefix):
......@@ -427,6 +458,31 @@ class RefGenesGetter:
print('"VFDB 2016: hierarchical and refined dataset for big data analysis-10 years on",\nChen LH et al 2016, Nucleic Acids Res. 44(Database issue):D694-D697. PMID: 26578559\n')
@classmethod
def _fix_virulencefinder_fasta_file(cls, infile, outfile):
'''Some line breaks are missing in the FASTA files from
viruslence finder. Which means there are lines like this:
AAGATCCAATAACTGAAGATGTTGAACAAACAATTCATAATATTTATGGTCAATATGCTATTTTCGTTGA
AGGTGTTGCGCATTTACCTGGACATCTCTCTCCATTATTAAAAAAATTACTACTTAAATCTTTATAA>coa:1:BA000018.3
ATGAAAAAGCAAATAATTTCGCTAGGCGCATTAGCAGTTGCATCTAGCTTATTTACATGGGATAACAAAG
and therefore the sequences are messed up when we parse them. Also
one has a > at the end, then the seq name on the next line.
This function fixes the file by adding line breaks'''
with open(infile) as f_in, open(outfile, 'w') as f_out:
for line in f_in:
if line.startswith('>') or '>' not in line:
print(line, end='', file=f_out)
elif line.endswith('>\n'):
print('WARNING: found line with ">" at the end! Fixing. Line:' + line.rstrip() + ' in file ' + infile, file=sys.stderr)
print(line.rstrip('>\n'), file=f_out)
print('>', end='', file=f_out)
else:
print('WARNING: found line with ">" not at the start! Fixing. Line:' + line.rstrip() + ' in file ' + infile, file=sys.stderr)
line1, line2 = line.split('>')
print(line1, file=f_out)
print('>', line2, sep='', end='', file=f_out)
def _get_from_virulencefinder(self, outprefix):
outprefix = os.path.abspath(outprefix)
final_fasta = outprefix + '.fa'
......@@ -434,6 +490,7 @@ class RefGenesGetter:
tmpdir = outprefix + '.tmp.download'
current_dir = os.getcwd()
if self.version == 'old':
try:
os.mkdir(tmpdir)
os.chdir(tmpdir)
......@@ -445,6 +502,9 @@ class RefGenesGetter:
print('Downloading data with:', cmd, sep='\n')
common.syscall(cmd)
common.syscall('unzip ' + zipfile)
else:
RefGenesGetter._get_genetic_epi_database_from_bitbucket('plasmidfinder', tmpdir, git_commit=self.version)
os.chdir(tmpdir)
print('Combining downloaded fasta files...')
fout_fa = pyfastaq.utils.open_file_write(final_fasta)
......@@ -454,7 +514,9 @@ class RefGenesGetter:
for filename in os.listdir(tmpdir):
if filename.endswith('.fsa'):
print(' ', filename)
file_reader = pyfastaq.sequences.file_reader(os.path.join(tmpdir, filename))
fix_file = os.path.join(tmpdir, filename + '.fix.fsa')
RefGenesGetter._fix_virulencefinder_fasta_file(os.path.join(tmpdir, filename), fix_file)
file_reader = pyfastaq.sequences.file_reader(fix_file)
for seq in file_reader:
original_id = seq.id
seq.id = seq.id.replace('_', '.', 1)
......
......@@ -6,7 +6,7 @@ def run(options):
if options.no_cdhit and options.cdhit_clusters is not None:
sys.exit('Cannot use both --no_cdhit and --cdhit_clusters. Neither or exactly one of those options must be used')
extern_progs, version_report_lines = versions.get_all_versions()
extern_progs, version_report_lines = versions.get_all_versions(using_spades=False)
if options.verbose:
print(*version_report_lines, sep='\n')
......
......@@ -35,7 +35,7 @@ def run(options):
print('Output directory already exists. ARIBA makes the output directory. Cannot continue.', file=sys.stderr)
sys.exit(1)
extern_progs, version_report_lines = ariba.versions.get_all_versions()
extern_progs, version_report_lines = ariba.versions.get_all_versions(using_spades=options.assembler == 'spades')
if options.verbose:
print(*version_report_lines, sep='\n')
......
......@@ -9,7 +9,7 @@ from ariba import external_progs
modules_dir = os.path.dirname(os.path.abspath(assembly.__file__))
data_dir = os.path.join(modules_dir, 'tests', 'data')
extern_progs = external_progs.ExternalProgs()
extern_progs = external_progs.ExternalProgs(using_spades=True)
class TestAssembly(unittest.TestCase):
def test_run_fermilite(self):
......
>seq1
ACGT
A
>seq2
AGT
AC>seq3
ACGT
>seq4
AACGT>
seq5
AAC
>seq1
ACGT
A
>seq2
AGT
AC
>seq3
ACGT
>seq4
AACGT
>seq5
AAC
import unittest
import os
import filecmp
from ariba import ref_genes_getter
modules_dir = os.path.dirname(os.path.abspath(ref_genes_getter.__file__))
data_dir = os.path.join(modules_dir, 'tests', 'data')
class TestRefGenesGetter(unittest.TestCase):
def test_fix_virulencefinder_fasta_file(self):
'''test _fix_virulencefinder_fasta_file'''
infile = os.path.join(data_dir, 'ref_genes_getter.fix_virulencefinder_fasta_file.in.fa')
tmp_file = 'tmp.test.ref_genes_getter.fix_virulencefinder_fasta_file.out.fa'
expected_file = os.path.join(data_dir, 'ref_genes_getter.fix_virulencefinder_fasta_file.out.fa')
ref_genes_getter.RefGenesGetter._fix_virulencefinder_fasta_file(infile, tmp_file)
self.assertTrue(filecmp.cmp(expected_file, tmp_file, shallow=False))
os.unlink(tmp_file)
......@@ -17,8 +17,8 @@ package_max_versions = {
}
def get_all_versions(raise_error=True):
extern_progs = external_progs.ExternalProgs(fail_on_error=False)
def get_all_versions(raise_error=True, using_spades=True):
extern_progs = external_progs.ExternalProgs(fail_on_error=False, using_spades=using_spades)
report_lines = [
'ARIBA version: ' + ariba_version,
......
ariba (2.12.0+ds-1) unstable; urgency=medium
* New upstream release.
* Remove obsolete python version hint in d/control.
* Bump to debhelper 11.
-- Sascha Steinbiss <satta@debian.org> Tue, 15 May 2018 17:08:54 +0200
ariba (2.11.1+ds-3) unstable; urgency=medium
[ Steffen Möller ]
......
......@@ -3,7 +3,7 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.
Uploaders: Sascha Steinbiss <satta@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 10),
Build-Depends: debhelper (>= 11),
python3,
python3-all,
python3-dev,
......@@ -25,7 +25,6 @@ Build-Depends: debhelper (>= 10),
help2man,
asciidoctor
Standards-Version: 4.1.4
X-Python3-Version: >= 3.2
Vcs-Browser: https://salsa.debian.org/med-team/ariba
Vcs-Git: https://salsa.debian.org/med-team/ariba.git
Homepage: https://github.com/sanger-pathogens/ariba
......
......@@ -62,7 +62,7 @@ subparser_getref = subparsers.add_parser(
description='Download reference data from one of a few supported public resources',
)
subparser_getref.add_argument('--debug', action='store_true', help='Do not delete temporary downloaded files')
subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Only applies to card and megares')
subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Applies to: card, megares, plasmidfinder, resfinder, srst2_argannot, virulencefinder. For plasmid/res/virulencefinder: default is to get latest from bitbucket - supply git commit hash to get a specific version from bitbucket, or use "old " to get from old website. For srst2_argannot: default is latest version r2, use r1 to get the older version')
subparser_getref.add_argument('db', help='Database to download. Must be one of: ' + ' '.join(allowed_dbs), choices=allowed_dbs, metavar="DB name")
subparser_getref.add_argument('outprefix', help='Prefix of output filenames')
subparser_getref.set_defaults(func=ariba.tasks.getref.run)
......
......@@ -55,7 +55,7 @@ vcfcall_mod = Extension(
setup(
ext_modules=[minimap_mod, fermilite_mod, vcfcall_mod],
name='ariba',
version='2.11.1',
version='2.12.0',
description='ARIBA: Antibiotic Resistance Identification By Assembly',
packages = find_packages(),
package_data={'ariba': ['test_run_data/*']},
......