Skip to content
Commits on Source (8)
Metadata-Version: 1.0 Metadata-Version: 1.0
Name: cgecore Name: cgecore
Version: 1.4.4 Version: 1.5.0
Summary: Center for Genomic Epidemiology Core Module Summary: Center for Genomic Epidemiology Core Module
Home-page: https://bitbucket.org/genomicepidemiology/cge_core_module Home-page: https://bitbucket.org/genomicepidemiology/cge_core_module
Author: Center for Genomic Epidemiology Author: Center for Genomic Epidemiology
...@@ -24,10 +24,12 @@ Description: # cge_core_module ...@@ -24,10 +24,12 @@ Description: # cge_core_module
# Install package locally # Install package locally
python2 setup.py install python2 setup.py install
python3 setup.py install python3 setup.py install
# Distribute to PyPi # Distribute to PyPi
python3 setup.py sdist bdist_wheel python3 setup.py sdist bdist_wheel
twine upload dist/* twine upload dist/*
*deprecated:* *deprecated:*
......
...@@ -16,10 +16,12 @@ https://pypi.org/project/cgecore/ ...@@ -16,10 +16,12 @@ https://pypi.org/project/cgecore/
# Install package locally # Install package locally
python2 setup.py install python2 setup.py install
python3 setup.py install python3 setup.py install
# Distribute to PyPi # Distribute to PyPi
python3 setup.py sdist bdist_wheel python3 setup.py sdist bdist_wheel
twine upload dist/* twine upload dist/*
*deprecated:* *deprecated:*
......
Metadata-Version: 1.0 Metadata-Version: 1.0
Name: cgecore Name: cgecore
Version: 1.4.4 Version: 1.5.0
Summary: Center for Genomic Epidemiology Core Module Summary: Center for Genomic Epidemiology Core Module
Home-page: https://bitbucket.org/genomicepidemiology/cge_core_module Home-page: https://bitbucket.org/genomicepidemiology/cge_core_module
Author: Center for Genomic Epidemiology Author: Center for Genomic Epidemiology
...@@ -24,10 +24,12 @@ Description: # cge_core_module ...@@ -24,10 +24,12 @@ Description: # cge_core_module
# Install package locally # Install package locally
python2 setup.py install python2 setup.py install
python3 setup.py install python3 setup.py install
# Distribute to PyPi # Distribute to PyPi
python3 setup.py sdist bdist_wheel python3 setup.py sdist bdist_wheel
twine upload dist/* twine upload dist/*
*deprecated:* *deprecated:*
......
...@@ -13,7 +13,7 @@ from .argumentparsing import (check_file_type, get_arguments, get_string, ...@@ -13,7 +13,7 @@ from .argumentparsing import (check_file_type, get_arguments, get_string,
) )
##################### #####################
__version__ = "1.4.4" __version__ = "1.5.0"
__all__ = [ __all__ = [
"argumentparsing", "argumentparsing",
"cmdline", "cmdline",
......
...@@ -40,7 +40,8 @@ class Blaster(): ...@@ -40,7 +40,8 @@ class Blaster():
os.chmod(tmp_out_path, 0o775) os.chmod(tmp_out_path, 0o775)
# Running blast # Running blast
if (os.path.isfile(out_file) and os.access(out_file, os.R_OK) if (os.path.isfile(out_file)
and os.access(out_file, os.R_OK)
and reuse_results): and reuse_results):
print("Found " + out_file + " skipping DB.") print("Found " + out_file + " skipping DB.")
out, err = (b'', b'') out, err = (b'', b'')
...@@ -50,7 +51,8 @@ class Blaster(): ...@@ -50,7 +51,8 @@ class Blaster():
" -dust 'no'" % (blast, db_file, inputfile, " -dust 'no'" % (blast, db_file, inputfile,
out_file, threshold, max_target_seqs)) out_file, threshold, max_target_seqs))
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, process = subprocess.Popen(cmd, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE) stderr=subprocess.PIPE)
out, err = process.communicate() out, err = process.communicate()
...@@ -59,18 +61,18 @@ class Blaster(): ...@@ -59,18 +61,18 @@ class Blaster():
# Test if output file exist # Test if output file exist
result_handle = open(out_file, "r") result_handle = open(out_file, "r")
except IOError: except IOError:
sys.exit(("Error: BLAST did not run as expected. " + sys.exit(("Error: BLAST did not run as expected. "
"The expected output file, {}, did not exist.\n" + "The expected output file, {}, did not exist.\n"
"BLAST finished with the following response:" + "BLAST finished with the following response:"
"\n{}\n{}").format(os.path.abspath(out_file), "\n{}\n{}").format(os.path.abspath(out_file),
out.decode("utf-8"), out.decode("utf-8"),
err.decode("utf-8"))) err.decode("utf-8")))
# Test if blast output is empty # Test if blast output is empty
if os.stat(out_file).st_size == 0: if os.stat(out_file).st_size == 0:
sys.exit(("Error: BLAST did not run as expected. " + sys.exit(("Error: BLAST did not run as expected. "
"The output file {} was empty.\n" + "The output file {} was empty.\n"
"BLAST finished with the following response:" + "BLAST finished with the following response:"
"\n{}\n{}").format(os.path.abspath(out_file), "\n{}\n{}").format(os.path.abspath(out_file),
out.decode("utf-8"), out.decode("utf-8"),
err.decode("utf-8"))) err.decode("utf-8")))
...@@ -96,12 +98,23 @@ class Blaster(): ...@@ -96,12 +98,23 @@ class Blaster():
# Parsing over the hits and only keeping the best # Parsing over the hits and only keeping the best
for blast_record in blast_records: for blast_record in blast_records:
# OLD CODE TO BE REMOVED
# query = blast_record.query
# blast_record.alignments.sort(key=lambda align: (
# -max((len(hsp.query)
# * (int(hsp.identities) / float(len(hsp.query)))
# for hsp in align.hsps)))
# )
# Sort BLAST alignments by the hsp in each alignment with the
# highest number of identical nucleotides (hsp.identities)
blast_record.alignments.sort(
key=lambda align: (max((int(hsp.identities)
for hsp in align.hsps))),
reverse=True)
query = blast_record.query query = blast_record.query
blast_record.alignments.sort(key=lambda align: (
-max((len(hsp.query)
* (int(hsp.identities) / float(len(hsp.query)))
for hsp in align.hsps)))
)
for alignment in blast_record.alignments: for alignment in blast_record.alignments:
# Setting the e-value as 1 and bit as 0 to get the best # Setting the e-value as 1 and bit as 0 to get the best
...@@ -117,6 +130,8 @@ class Blaster(): ...@@ -117,6 +130,8 @@ class Blaster():
tmp = alignment.title.split(" ") tmp = alignment.title.split(" ")
sbjct_header = tmp[1] sbjct_header = tmp[1]
# DEBUG
print("Found: {}".format(sbjct_header))
bit = hsp.bits bit = hsp.bits
sbjct_length = alignment.length sbjct_length = alignment.length
sbjct_start = hsp.sbjct_start sbjct_start = hsp.sbjct_start
...@@ -129,7 +144,8 @@ class Blaster(): ...@@ -129,7 +144,8 @@ class Blaster():
query_start = hsp.query_start query_start = hsp.query_start
query_end = hsp.query_end query_end = hsp.query_end
HSP_length = len(query_string) HSP_length = len(query_string)
perc_ident = int(hsp.identities) / float(HSP_length) * 100 perc_ident = (int(hsp.identities)
/ float(HSP_length) * 100)
strand = 0 strand = 0
coverage = ((int(HSP_length) - int(gaps)) coverage = ((int(HSP_length) - int(gaps))
/ float(sbjct_length)) / float(sbjct_length))
...@@ -139,9 +155,9 @@ class Blaster(): ...@@ -139,9 +155,9 @@ class Blaster():
# cal_score is later used to select the best hit # cal_score is later used to select the best hit
cal_score = perc_ident * coverage cal_score = perc_ident * coverage
hit_id = "%s:%s..%s:%s:%f" % (contig_name, query_start, hit_id = "%s:%s..%s:%s:%f" % (
query_end, sbjct_header, contig_name, query_start, query_end,
cal_score) sbjct_header, cal_score)
# If the hit is on the other strand # If the hit is on the other strand
if sbjct_start > sbjct_end: if sbjct_start > sbjct_end:
...@@ -149,13 +165,17 @@ class Blaster(): ...@@ -149,13 +165,17 @@ class Blaster():
sbjct_start = sbjct_end sbjct_start = sbjct_end
sbjct_end = tmp sbjct_end = tmp
query_string = self.reversecomplement(query_string) query_string = self.reversecomplement(
query_string)
homo_string = homo_string[::-1] homo_string = homo_string[::-1]
sbjct_string = self.reversecomplement(sbjct_string) sbjct_string = self.reversecomplement(
sbjct_string)
strand = 1 strand = 1
# Save hit # Save hit
if (cut_off and perc_coverage > 20) or cut_off == False: if((cut_off and perc_coverage > 20)
or cut_off is False):
best_hsp = {'evalue': hsp.expect, best_hsp = {'evalue': hsp.expect,
'sbjct_header': sbjct_header, 'sbjct_header': sbjct_header,
'bit': bit, 'bit': bit,
...@@ -188,13 +208,18 @@ class Blaster(): ...@@ -188,13 +208,18 @@ class Blaster():
tmp_results = gene_results tmp_results = gene_results
# Compare the hit results # Compare the hit results
save, gene_split, gene_results = self.compare_results( save, gene_split, gene_results = (
save, best_hsp, tmp_results, tmp_gene_split, self.compare_results(save, best_hsp,
tmp_results,
tmp_gene_split,
allowed_overlap) allowed_overlap)
)
# If the hit is not overlapping with other hit # If the hit is not overlapping with other hit
# seqeunces it is kept # seqeunces it is kept
if save == 1: if save == 1:
# DEBUG
print("Saving: {}".format(hit_id))
gene_results[hit_id] = best_hsp gene_results[hit_id] = best_hsp
result_handle.close() result_handle.close()
...@@ -344,14 +369,19 @@ class Blaster(): ...@@ -344,14 +369,19 @@ class Blaster():
# sequnce but match different genes only the best hit is # sequnce but match different genes only the best hit is
# kept # kept
if new_contig == old_contig: if new_contig == old_contig:
print("Same contig: {} == {}".format(new_contig, old_contig))
print("\t{} vs {}".format(new_db_hit, old_db_hit))
# Check if saved hits overlaps with current hit # Check if saved hits overlaps with current hit
overlap_len = (((old_end_query - old_start_query) hit_union_length = (max(old_end_query, new_end_query)
- min(old_start_query, new_start_query))
hit_lengths_sum = ((old_end_query - old_start_query)
+ (new_end_query - new_start_query)) + (new_end_query - new_start_query))
- (max(old_end_query, new_end_query) overlap_len = (hit_lengths_sum - hit_union_length)
- min(old_start_query, new_start_query)))
if overlap_len < allowed_overlap: if overlap_len < allowed_overlap:
print("\tignore overlap ({}): {}".format(overlap_len, new_db_hit))
continue continue
print("\toverlap found ({}): {}".format(overlap_len, new_db_hit))
# If the two hits cover the exact same place on the # If the two hits cover the exact same place on the
# contig only the percentage of identity is compared # contig only the percentage of identity is compared
...@@ -388,10 +418,9 @@ class Blaster(): ...@@ -388,10 +418,9 @@ class Blaster():
break break
# If new hit overlaps with the saved hit # If new hit overlaps with the saved hit
elif((max(old_end_query, new_end_query) elif(hit_union_length <= hit_lengths_sum):
- min(old_start_query, new_start_query)) print("\t{} <= {}".format(hit_union_length, hit_lengths_sum))
<= ((old_end_query - old_start_query) print("\t\tScores: {} cmp {}".format(new_score, old_score))
+ (new_end_query - new_start_query))):
if new_score > old_score: if new_score > old_score:
# Set to remove old gene # Set to remove old gene
...@@ -404,7 +433,7 @@ class Blaster(): ...@@ -404,7 +433,7 @@ class Blaster():
elif new_score == old_score: elif new_score == old_score:
# If both genes are of same coverage # If both genes are of same coverage
# and identity is the same implyed by new_score == old_score # and identity is the same implied by new_score == old_score
# hit is chosen based on length # hit is chosen based on length
if((int(best_hsp['perc_coverage']) == if((int(best_hsp['perc_coverage']) ==
int(hit_data['perc_coverage'])) int(hit_data['perc_coverage']))
......
...@@ -36,6 +36,11 @@ parser.add_argument("-t", "--threshold", dest="threshold", ...@@ -36,6 +36,11 @@ parser.add_argument("-t", "--threshold", dest="threshold",
help="Blast threshold for identity", help="Blast threshold for identity",
type=float, type=float,
default=0.90) default=0.90)
parser.add_argument("--overlap",
help=("Allow hits/genes to overlap with this number of "
"nucleotides. Default: 30."),
type=int,
default=30)
args = parser.parse_args() args = parser.parse_args()
...@@ -94,7 +99,8 @@ for inp_file in input_list: ...@@ -94,7 +99,8 @@ for inp_file in input_list:
# Calling blast and parsing output # Calling blast and parsing output
blast_run = Blaster(inputfile=inp_file, databases=databases, blast_run = Blaster(inputfile=inp_file, databases=databases,
db_path=db_path, out_path=out_path, min_cov=min_cov, db_path=db_path, out_path=out_path, min_cov=min_cov,
threshold=threshold, blast=blast) threshold=threshold, blast=blast,
allowed_overlap=args.overlap)
results = blast_run.results results = blast_run.results
query_align = blast_run.gene_align_query query_align = blast_run.gene_align_query
......
...@@ -32,9 +32,9 @@ class CGEFinder(): ...@@ -32,9 +32,9 @@ class CGEFinder():
def kma(inputfile_1, out_path, databases, db_path_kma, min_cov=0.6, def kma(inputfile_1, out_path, databases, db_path_kma, min_cov=0.6,
threshold=0.9, kma_path="cge/kma/kma", sample_name="", threshold=0.9, kma_path="cge/kma/kma", sample_name="",
inputfile_2=None, kma_mrs=None, kma_gapopen=None, inputfile_2=None, kma_mrs=None, kma_gapopen=None,
kma_gapextend=None, kma_penalty=None, kma_reward=None, kma_pm=None, kma_gapextend=None, kma_penalty=None, kma_reward=None,
kma_fpm=None, kma_memmode=False, kma_nanopore=False, debug=False, kma_apm=None, kma_memmode=False, kma_nanopore=False, debug=False,
kma_add_args=None): kma_add_args=None, kma_cge=False, kma_1t1=False):
""" """
I expect that there will only be one hit pr gene, but if there are I expect that there will only be one hit pr gene, but if there are
more, I assume that the sequence of the hits are the same in the res more, I assume that the sequence of the hits are the same in the res
...@@ -73,10 +73,12 @@ class CGEFinder(): ...@@ -73,10 +73,12 @@ class CGEFinder():
kma_cmd += " -penalty " + str(kma_penalty) kma_cmd += " -penalty " + str(kma_penalty)
if(kma_reward is not None): if(kma_reward is not None):
kma_cmd += " -reward " + str(kma_reward) kma_cmd += " -reward " + str(kma_reward)
if(kma_pm is not None): if(kma_apm is not None):
kma_cmd += " -pm " + kma_pm kma_cmd += " -apm " + kma_apm
if(kma_fpm is not None): if(kma_cge):
kma_cmd += " -fpm " + kma_fpm kma_cmd += " -cge "
if(kma_1t1):
kma_cmd += " -1t1 "
if (kma_memmode): if (kma_memmode):
kma_cmd += " -mem_mode " kma_cmd += " -mem_mode "
if (kma_nanopore): if (kma_nanopore):
......
...@@ -258,7 +258,7 @@ class Program: ...@@ -258,7 +258,7 @@ class Program:
'else echo "Done" >> {stderr}; fi\n').format( 'else echo "Done" >> {stderr}; fi\n').format(
stderr=self.stderr)) stderr=self.stderr))
f.write('exit $ec\n') f.write('exit $ec\n')
os.chmod(script, 0o744) os.chmod(script, 0o755)
if self.queue is not None: if self.queue is not None:
# Setup execution of shell script through TORQUE # Setup execution of shell script through TORQUE
......
python-cgecore (1.5.0-1) unstable; urgency=medium
* New upstream version
* debhelper-compat 12
* Standards-Version: 4.4.0
* Testsuite: autopkgtest-pkg-python
* Set upstream metadata fields: Name.
-- Andreas Tille <tille@debian.org> Thu, 01 Aug 2019 22:24:02 +0200
python-cgecore (1.4.4-1) unstable; urgency=medium python-cgecore (1.4.4-1) unstable; urgency=medium
* Initial release (Closes: #929496) * Initial release (Closes: #929496)
......
...@@ -2,13 +2,14 @@ Source: python-cgecore ...@@ -2,13 +2,14 @@ Source: python-cgecore
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org> Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Andreas Tille <tille@debian.org> Uploaders: Andreas Tille <tille@debian.org>
Section: python Section: python
Testsuite: autopkgtest-pkg-python
Priority: optional Priority: optional
Build-Depends: debhelper (>= 12~), Build-Depends: debhelper-compat (= 12),
dh-python, dh-python,
python3, python3,
python3-setuptools, python3-setuptools,
python3-biopython python3-biopython
Standards-Version: 4.3.0 Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/python-cgecore Vcs-Browser: https://salsa.debian.org/med-team/python-cgecore
Vcs-Git: https://salsa.debian.org/med-team/python-cgecore.git Vcs-Git: https://salsa.debian.org/med-team/python-cgecore.git
Homepage: https://bitbucket.org/genomicepidemiology/cge_core_module/ Homepage: https://bitbucket.org/genomicepidemiology/cge_core_module/
......