Skip to content
GitLab
Explore
Sign in
Register
Commits on Source (4)
New upstream version 2.13.1+ds
· 60af3c31
Sascha Steinbiss
authored
Nov 21, 2018
60af3c31
Merge tag 'upstream/2.13.1+ds'
· 000600c0
Sascha Steinbiss
authored
Nov 21, 2018
Upstream version 2.13.1+ds
000600c0
new upstream release
· c3a1e4af
Sascha Steinbiss
authored
Nov 21, 2018
c3a1e4af
new dependency
· 76cfa6e4
Sascha Steinbiss
authored
Nov 21, 2018
76cfa6e4
Show whitespace changes
Inline
Side-by-side
.travis.yml
View file @
76cfa6e4
...
...
@@ -8,9 +8,13 @@ addons:
-
libgfortran3
-
libncurses5-dev
python
:
-
'
3.
4
'
-
'
3.
5
'
sudo
:
false
install
:
-
source ./install_dependencies.sh
before_script
:
-
pip install codecov
script
:
-
python setup.py test
-
coverage run setup.py test
after_success
:
-
codecov
README.md
View file @
76cfa6e4
...
...
@@ -7,6 +7,11 @@ For how to use ARIBA, please see the [ARIBA wiki page][ARIBA wiki].
[

](https://travis-ci.org/sanger-pathogens/ariba)
[

](https://github.com/sanger-pathogens/ariba/blob/master/LICENSE)
[

](http://mgen.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000131)
[

](http://bioconda.github.io/recipes/ariba/README.html)
[

](https://quay.io/repository/biocontainers/ariba)
[

](https://hub.docker.com/r/sangerpathogens/ariba)
[

](https://hub.docker.com/r/sangerpathogens/ariba)
[

](https://codecov.io/gh/sanger-pathogens/ariba)
## Contents
*
[
Introduction
](
#introduction
)
...
...
_config.yml
0 → 100644
View file @
76cfa6e4
theme
:
jekyll-theme-cayman
\ No newline at end of file
ariba/__init__.py
View file @
76cfa6e4
...
...
@@ -49,6 +49,7 @@ __all__ = [
'
summary_cluster_variant
'
,
'
summary_sample
'
,
'
tasks
'
,
'
tb
'
,
'
versions
'
,
'
vfdb_parser
'
,
]
...
...
ariba/assembly.py
View file @
76cfa6e4
import
os
import
sys
import
shutil
import
pyfastaq
import
pymummer
import
fermilite_ariba
from
ariba
import
common
,
faidx
,
mapping
,
bam_parse
,
external_progs
,
ref_seq_chooser
from
ariba
import
common
,
mapping
,
bam_parse
,
external_progs
,
ref_seq_chooser
import
shlex
class
Error
(
Exception
):
pass
...
...
@@ -197,7 +196,7 @@ class Assembly:
self
.
assembled_ok
=
True
if
self
.
clean
:
print
(
'
Deleting assembly directory
'
,
self
.
assembler_dir
,
file
=
self
.
log_fh
)
shutil
.
rmtree
(
self
.
assembler_dir
,
ignore_errors
=
True
)
common
.
rmtree
(
self
.
assembler_dir
)
finally
:
os
.
chdir
(
cwd
)
...
...
ariba/cdhit.py
View file @
76cfa6e4
import
tempfile
import
shutil
import
sys
import
os
import
pyfastaq
...
...
@@ -152,6 +151,6 @@ class Runner:
common
.
syscall
(
cmd
,
verbose
=
self
.
verbose
)
clusters
=
self
.
_get_clusters_from_bak_file
(
cluster_info_outfile
,
self
.
min_cluster_number
)
shutil
.
rmtree
(
tmpdir
)
common
.
rmtree
(
tmpdir
)
return
clusters
ariba/cluster.py
View file @
76cfa6e4
...
...
@@ -4,10 +4,9 @@ import os
import
atexit
import
random
import
math
import
shutil
import
sys
import
pyfastaq
from
ariba
import
assembly
,
assembly_compare
,
assembly_variants
,
external_progs
,
flag
,
mapping
,
report
,
samtools_variants
from
ariba
import
assembly
,
assembly_compare
,
assembly_variants
,
common
,
external_progs
,
flag
,
mapping
,
report
,
samtools_variants
class
Error
(
Exception
):
pass
...
...
@@ -369,7 +368,7 @@ class Cluster:
self
.
_clean_file
(
self
.
reads_for_assembly2
)
if
self
.
clean
:
print
(
'
Deleting Assembly directory
'
,
self
.
assembly_dir
,
file
=
self
.
log_fh
,
flush
=
True
)
shutil
.
rmtree
(
self
.
assembly_dir
,
ignore_errors
=
True
)
common
.
rmtree
(
self
.
assembly_dir
)
if
self
.
assembled_ok
and
self
.
assembly
.
ref_seq_name
is
not
None
:
...
...
ariba/clusters.py
View file @
76cfa6e4
...
...
@@ -2,15 +2,15 @@ import signal
import
time
import
os
import
copy
import
json
import
tempfile
import
pickle
import
itertools
import
sys
import
shutil
import
multiprocessing
import
pyfastaq
import
minimap_ariba
from
ariba
import
cluster
,
common
,
histogram
,
mlst_reporter
,
read_store
,
report
,
report_filter
,
reference_data
from
ariba
import
cluster
,
common
,
histogram
,
mlst_reporter
,
read_store
,
report
,
report_filter
,
reference_data
,
tb
class
Error
(
Exception
):
pass
...
...
@@ -42,7 +42,7 @@ def _run_cluster(obj, verbose, clean, fails_dir, remaining_clusters, remaining_c
print
(
'
Deleting cluster dir
'
,
obj
.
root_dir
,
flush
=
True
)
if
os
.
path
.
exists
(
obj
.
root_dir
):
try
:
shutil
.
rmtree
(
obj
.
root_dir
)
common
.
rmtree
(
obj
.
root_dir
)
except
:
pass
...
...
@@ -106,6 +106,7 @@ class Clusters:
self
.
report_file_filtered
=
os
.
path
.
join
(
self
.
outdir
,
'
report.tsv
'
)
self
.
mlst_reports_prefix
=
os
.
path
.
join
(
self
.
outdir
,
'
mlst_report
'
)
self
.
mlst_profile_file
=
os
.
path
.
join
(
self
.
refdata_dir
,
'
pubmlst.profile.txt
'
)
self
.
tb_resistance_calls_file
=
os
.
path
.
join
(
self
.
outdir
,
'
tb.resistance.json
'
)
self
.
catted_assembled_seqs_fasta
=
os
.
path
.
join
(
self
.
outdir
,
'
assembled_seqs.fa.gz
'
)
self
.
catted_genes_matching_refs_fasta
=
os
.
path
.
join
(
self
.
outdir
,
'
assembled_genes.fa.gz
'
)
self
.
catted_assemblies_fasta
=
os
.
path
.
join
(
self
.
outdir
,
'
assemblies.fa.gz
'
)
...
...
@@ -227,12 +228,14 @@ class Clusters:
fasta_file
=
os
.
path
.
join
(
indir
,
'
02.cdhit.all.fa
'
)
metadata_file
=
os
.
path
.
join
(
indir
,
'
01.filter.check_metadata.tsv
'
)
info_file
=
os
.
path
.
join
(
indir
,
'
00.info.txt
'
)
parameters_file
=
os
.
path
.
join
(
indir
,
'
00.params.json
'
)
clusters_pickle_file
=
os
.
path
.
join
(
indir
,
'
02.cdhit.clusters.pickle
'
)
params
=
Clusters
.
_load_reference_data_info_file
(
info_file
)
refdata
=
reference_data
.
ReferenceData
(
[
fasta_file
],
[
metadata_file
],
genetic_code
=
params
[
'
genetic_code
'
],
parameters_file
=
parameters_file
,
)
with
open
(
clusters_pickle_file
,
'
rb
'
)
as
f
:
...
...
@@ -557,7 +560,7 @@ class Clusters:
def
_clean
(
self
):
if
self
.
clean
:
shutil
.
rmtree
(
self
.
fails_dir
,
ignore_errors
=
True
)
common
.
rmtree
(
self
.
fails_dir
)
try
:
self
.
tmp_dir_obj
.
cleanup
()
...
...
@@ -566,7 +569,7 @@ class Clusters:
if
self
.
verbose
:
print
(
'
Deleting Logs directory
'
,
self
.
logs_dir
)
shutil
.
rmtree
(
self
.
logs_dir
,
ignore_errors
=
True
)
common
.
rmtree
(
self
.
logs_dir
)
try
:
if
self
.
verbose
:
...
...
@@ -588,6 +591,13 @@ class Clusters:
reporter
.
run
()
@classmethod
def
_write_tb_resistance_calls_json
(
cls
,
ariba_report_tsv
,
outfile
):
calls
=
tb
.
report_to_resistance_dict
(
ariba_report_tsv
)
with
open
(
outfile
,
'
w
'
)
as
f
:
json
.
dump
(
calls
,
f
,
sort_keys
=
True
,
indent
=
4
)
def
write_versions_file
(
self
,
original_dir
):
with
open
(
'
version_info.txt
'
,
'
w
'
)
as
f
:
print
(
'
ARIBA run with this command:
'
,
file
=
f
)
...
...
@@ -668,6 +678,9 @@ class Clusters:
Clusters
.
_write_mlst_reports
(
self
.
mlst_profile_file
,
self
.
report_file_filtered
,
self
.
mlst_reports_prefix
,
verbose
=
self
.
verbose
)
if
'
tb
'
in
self
.
refdata
.
extra_parameters
and
self
.
refdata
.
extra_parameters
[
'
tb
'
]:
Clusters
.
_write_tb_resistance_calls_json
(
self
.
report_file_filtered
,
self
.
tb_resistance_calls_file
)
if
self
.
clusters_all_ran_ok
and
self
.
verbose
:
print
(
'
\n
All done!
\n
'
)
finally
:
...
...
ariba/common.py
View file @
76cfa6e4
...
...
@@ -73,3 +73,9 @@ def download_file(url, outfile, max_attempts=3, sleep_time=2, verbose=False):
if
verbose
:
print
(
'
done
'
,
flush
=
True
)
def
rmtree
(
input_dir
):
'''
Does rm -r on input_dir. Meant to replace shutil.rmtree,
which seems to be causing issues with files not getting deleted
and the directory non-empty afterwards
'''
syscall
(
'
rm -rf
'
+
input_dir
)
ariba/megares_zip_parser.py
View file @
76cfa6e4
...
...
@@ -2,7 +2,6 @@ import os
import
sys
import
csv
import
zipfile
import
shutil
import
pyfastaq
from
ariba
import
common
...
...
@@ -39,7 +38,7 @@ class MegaresZipParser:
zfile
.
extract
(
member
,
path
=
outdir
)
if
None
in
original_files
.
values
():
shutil
.
rmtree
(
outdir
)
common
.
rmtree
(
outdir
)
raise
Error
(
'
Error. Not all expected files found in downloaded megares zipfile.
'
+
str
(
original_files
))
return
original_files
...
...
@@ -114,6 +113,6 @@ class MegaresZipParser:
sequences
=
{}
pyfastaq
.
tasks
.
file_to_dict
(
os
.
path
.
join
(
tmpdir
,
original_files
[
'
fasta
'
]),
sequences
)
MegaresZipParser
.
_write_files
(
self
.
outprefix
,
sequences
,
annotation_data
,
header_data
)
shutil
.
rmtree
(
tmpdir
)
common
.
rmtree
(
tmpdir
)
os
.
unlink
(
self
.
zip_file
)
ariba/mic_plotter.py
View file @
76cfa6e4
...
...
@@ -12,7 +12,7 @@ import matplotlib.gridspec as gridspec
import
matplotlib.cm
as
cmx
import
math
import
pyfastaq
from
ariba
import
common
,
reference_data
from
ariba
import
reference_data
class
Error
(
Exception
):
pass
...
...
ariba/mlst_reporter.py
View file @
76cfa6e4
import
os
import
pyfastaq
from
ariba
import
mlst_profile
,
summary_sample
...
...
ariba/pubmlst_getter.py
View file @
76cfa6e4
...
...
@@ -2,10 +2,10 @@ import tempfile
import
re
import
time
import
os
import
shutil
import
urllib.request
import
xml.etree.ElementTree
as
ET
import
pyfastaq
from
ariba
import
common
class
Error
(
Exception
):
pass
...
...
@@ -29,7 +29,7 @@ class PubmlstGetter:
xml_tree
=
ET
.
parse
(
xml_file
)
if
not
self
.
debug
:
shutil
.
rmtree
(
tmpdir
)
common
.
rmtree
(
tmpdir
)
return
xml_tree
...
...
ariba/read_filter.py
View file @
76cfa6e4
import
os
import
tempfile
import
shutil
from
ariba
import
common
,
external_progs
...
...
@@ -79,5 +78,5 @@ class ReadFilter:
wanted_ids
=
wanted_read_ids
)
shutil
.
rmtree
(
tmpdir
)
common
.
rmtree
(
tmpdir
)
return
total_reads
,
total_bases
ariba/ref_genes_getter.py
View file @
76cfa6e4
...
...
@@ -2,10 +2,8 @@ class Error (Exception): pass
import
os
import
re
import
shutil
import
tarfile
import
pyfastaq
import
time
import
json
import
subprocess
import
sys
...
...
@@ -177,7 +175,7 @@ class RefGenesGetter:
pyfastaq
.
utils
.
close
(
f_out_log
)
os
.
chdir
(
current_dir
)
if
not
self
.
debug
:
shutil
.
rmtree
(
tmpdir
)
common
.
rmtree
(
tmpdir
)
print
(
'
Extracted data and written ARIBA input files
\n
'
)
print
(
'
Finished. Final files are:
'
,
final_fasta
,
final_tsv
,
sep
=
'
\n\t
'
,
end
=
'
\n\n
'
)
...
...
@@ -233,7 +231,9 @@ class RefGenesGetter:
for
filename
in
os
.
listdir
():
if
filename
.
endswith
(
'
.fsa
'
):
print
(
'
'
,
filename
)
file_reader
=
pyfastaq
.
sequences
.
file_reader
(
filename
)
fix_file
=
os
.
path
.
join
(
tmpdir
,
filename
+
'
.fix.fsa
'
)
RefGenesGetter
.
_fix_virulencefinder_fasta_file
(
os
.
path
.
join
(
tmpdir
,
filename
),
fix_file
)
file_reader
=
pyfastaq
.
sequences
.
file_reader
(
fix_file
)
for
seq
in
file_reader
:
try
:
prefix
,
suffix
=
seq
.
id
.
split
(
'
_
'
,
maxsplit
=
1
)
...
...
@@ -257,7 +257,7 @@ class RefGenesGetter:
print
(
'
\n
Finished combining files
\n
'
)
os
.
chdir
(
current_dir
)
if
not
self
.
debug
:
shutil
.
rmtree
(
tmpdir
)
common
.
rmtree
(
tmpdir
)
print
(
'
Finished. Final files are:
'
,
final_fasta
,
final_tsv
,
sep
=
'
\n\t
'
,
end
=
'
\n\n
'
)
print
(
'
You can use them with ARIBA like this:
'
)
print
(
'
ariba prepareref -f
'
,
final_fasta
,
'
-m
'
,
final_tsv
,
'
output_directory
\n
'
)
...
...
@@ -300,7 +300,7 @@ class RefGenesGetter:
pyfastaq
.
utils
.
close
(
f_out_tsv
)
pyfastaq
.
utils
.
close
(
f_out_fa
)
if
not
self
.
debug
:
shutil
.
rmtree
(
tmpdir
)
common
.
rmtree
(
tmpdir
)
print
(
'
Finished. Final files are:
'
,
final_fasta
,
final_tsv
,
sep
=
'
\n\t
'
,
end
=
'
\n\n
'
)
print
(
'
You can use them with ARIBA like this:
'
)
...
...
@@ -372,7 +372,7 @@ class RefGenesGetter:
print
(
'
\n
Finished combining files
\n
'
)
os
.
chdir
(
current_dir
)
if
not
self
.
debug
:
shutil
.
rmtree
(
tmpdir
)
common
.
rmtree
(
tmpdir
)
print
(
'
Finished. Final files are:
'
,
final_fasta
,
final_tsv
,
sep
=
'
\n\t
'
,
end
=
'
\n\n
'
)
print
(
'
You can use them with ARIBA like this:
'
)
print
(
'
ariba prepareref -f
'
,
final_fasta
,
'
-m
'
,
final_tsv
,
'
output_directory
\n
'
)
...
...
@@ -446,7 +446,7 @@ class RefGenesGetter:
vparser
=
vfdb_parser
.
VfdbParser
(
zipfile
,
outprefix
)
vparser
.
run
()
if
not
self
.
debug
:
shutil
.
rmtree
(
tmpdir
)
common
.
rmtree
(
tmpdir
)
print
(
'
done
'
)
final_fasta
=
outprefix
+
'
.fa
'
final_tsv
=
outprefix
+
'
.tsv
'
...
...
@@ -534,7 +534,7 @@ class RefGenesGetter:
print
(
'
\n
Finished combining files
\n
'
)
os
.
chdir
(
current_dir
)
if
not
self
.
debug
:
shutil
.
rmtree
(
tmpdir
)
common
.
rmtree
(
tmpdir
)
print
(
'
Finished. Final files are:
'
,
final_fasta
,
final_tsv
,
sep
=
'
\n\t
'
,
end
=
'
\n\n
'
)
print
(
'
You can use them with ARIBA like this:
'
)
print
(
'
ariba prepareref -f
'
,
final_fasta
,
'
-m
'
,
final_tsv
,
'
output_directory
\n
'
)
...
...
ariba/ref_preparer.py
View file @
76cfa6e4
import
sys
import
os
import
shutil
import
pickle
import
pyfastaq
from
ariba
import
reference_data
from
ariba
import
common
,
reference_data
class
Error
(
Exception
):
pass
...
...
@@ -140,7 +139,7 @@ class RefPreparer:
original_dir
=
os
.
getcwd
()
if
self
.
force
and
os
.
path
.
exists
(
outdir
):
shutil
.
rmtree
(
outdir
)
common
.
rmtree
(
outdir
)
if
os
.
path
.
exists
(
outdir
):
raise
Error
(
'
Error! Output directory
'
+
outdir
+
'
already exists. Cannot continue
'
)
...
...
ariba/ref_seq_chooser.py
View file @
76cfa6e4
...
...
@@ -3,7 +3,7 @@ import copy
import
os
import
pymummer
import
pyfastaq
import
shutil
from
ariba
import
common
class
Error
(
Exception
):
pass
...
...
@@ -147,7 +147,7 @@ class RefSeqChooser:
maxmatch
=
True
,
).
run
()
nucmer_matches
=
RefSeqChooser
.
_load_nucmer_coords_file
(
coords_file
,
log_fh
=
log_fh
)
shutil
.
rmtree
(
tmpdir
)
common
.
rmtree
(
tmpdir
)
if
len
(
nucmer_matches
)
==
0
:
return
None
,
{}
...
...
@@ -172,7 +172,7 @@ class RefSeqChooser:
print
(
'
Checking for a better match to a ref sequence outside the cluster
'
,
file
=
self
.
log_fh
)
best_hit_from_all_seqs
,
not_needed
=
RefSeqChooser
.
_closest_nucmer_match_between_fastas
(
self
.
all_refs_fasta
,
pieces_fasta_file
,
self
.
log_fh
,
self
.
nucmer_min_id
,
self
.
nucmer_min_len
,
self
.
nucmer_breaklen
,
True
,
False
)
shutil
.
rmtree
(
tmpdir
)
common
.
rmtree
(
tmpdir
)
self
.
closest_ref_from_all_refs
=
best_hit_from_all_seqs
.
ref_name
if
self
.
closest_ref_from_all_refs
is
None
:
return
...
...
ariba/reference_data.py
View file @
76cfa6e4
import
json
import
os
import
sys
import
re
...
...
@@ -19,6 +20,7 @@ class ReferenceData:
min_gene_length
=
6
,
max_gene_length
=
10000
,
genetic_code
=
11
,
parameters_file
=
None
,
):
self
.
seq_filenames
=
{}
self
.
seq_dicts
=
{}
...
...
@@ -38,6 +40,12 @@ class ReferenceData:
else
:
self
.
ariba_to_original_name
=
ReferenceData
.
_load_rename_file
(
rename_file
)
if
parameters_file
is
None
or
not
os
.
path
.
exists
(
parameters_file
):
self
.
extra_parameters
=
{}
else
:
with
open
(
parameters_file
)
as
f
:
self
.
extra_parameters
=
json
.
load
(
f
)
@classmethod
def
_load_rename_file
(
cls
,
filename
):
...
...
ariba/report_flag_expander.py
View file @
76cfa6e4
import
copy
import
sys
import
pyfastaq
from
ariba
import
flag
...
...
ariba/summary_cluster.py
View file @
76cfa6e4
import
sys
from
ariba
import
flag
,
report
,
summary_cluster_variant
class
Error
(
Exception
):
pass
...
...
Prev
1
2
3
4
Next