Skip to content

Commits on Source 8

Metadata-Version: 1.0
Name: cgecore
Version: 1.4.4
Version: 1.5.0
Summary: Center for Genomic Epidemiology Core Module
Home-page: https://bitbucket.org/genomicepidemiology/cge_core_module
Author: Center for Genomic Epidemiology
......@@ -24,10 +24,12 @@ Description: # cge_core_module
# Install package locally
python2 setup.py install
python3 setup.py install
# Distribute to PyPi
python3 setup.py sdist bdist_wheel
twine upload dist/*
*deprecated:*
......
......@@ -16,10 +16,12 @@ https://pypi.org/project/cgecore/
# Install package locally
python2 setup.py install
python3 setup.py install
# Distribute to PyPi
python3 setup.py sdist bdist_wheel
twine upload dist/*
*deprecated:*
......
Metadata-Version: 1.0
Name: cgecore
Version: 1.4.4
Version: 1.5.0
Summary: Center for Genomic Epidemiology Core Module
Home-page: https://bitbucket.org/genomicepidemiology/cge_core_module
Author: Center for Genomic Epidemiology
......@@ -24,10 +24,12 @@ Description: # cge_core_module
# Install package locally
python2 setup.py install
python3 setup.py install
# Distribute to PyPi
python3 setup.py sdist bdist_wheel
twine upload dist/*
*deprecated:*
......
......@@ -13,7 +13,7 @@ from .argumentparsing import (check_file_type, get_arguments, get_string,
)
#####################
__version__ = "1.4.4"
__version__ = "1.5.0"
__all__ = [
"argumentparsing",
"cmdline",
......
......@@ -40,7 +40,8 @@ class Blaster():
os.chmod(tmp_out_path, 0o775)
# Running blast
if (os.path.isfile(out_file) and os.access(out_file, os.R_OK)
if (os.path.isfile(out_file)
and os.access(out_file, os.R_OK)
and reuse_results):
print("Found " + out_file + " skipping DB.")
out, err = (b'', b'')
......@@ -50,7 +51,8 @@ class Blaster():
" -dust 'no'" % (blast, db_file, inputfile,
out_file, threshold, max_target_seqs))
process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
process = subprocess.Popen(cmd, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = process.communicate()
......@@ -59,18 +61,18 @@ class Blaster():
# Test if output file exist
result_handle = open(out_file, "r")
except IOError:
sys.exit(("Error: BLAST did not run as expected. " +
"The expected output file, {}, did not exist.\n" +
"BLAST finished with the following response:" +
sys.exit(("Error: BLAST did not run as expected. "
"The expected output file, {}, did not exist.\n"
"BLAST finished with the following response:"
"\n{}\n{}").format(os.path.abspath(out_file),
out.decode("utf-8"),
err.decode("utf-8")))
# Test if blast output is empty
if os.stat(out_file).st_size == 0:
sys.exit(("Error: BLAST did not run as expected. " +
"The output file {} was empty.\n" +
"BLAST finished with the following response:" +
sys.exit(("Error: BLAST did not run as expected. "
"The output file {} was empty.\n"
"BLAST finished with the following response:"
"\n{}\n{}").format(os.path.abspath(out_file),
out.decode("utf-8"),
err.decode("utf-8")))
......@@ -96,12 +98,23 @@ class Blaster():
# Parsing over the hits and only keeping the best
for blast_record in blast_records:
# OLD CODE TO BE REMOVED
# query = blast_record.query
# blast_record.alignments.sort(key=lambda align: (
# -max((len(hsp.query)
# * (int(hsp.identities) / float(len(hsp.query)))
# for hsp in align.hsps)))
# )
# Sort BLAST alignments by the hsp in each alignment with the
# highest number of identical nucleotides (hsp.identities)
blast_record.alignments.sort(
key=lambda align: (max((int(hsp.identities)
for hsp in align.hsps))),
reverse=True)
query = blast_record.query
blast_record.alignments.sort(key=lambda align: (
-max((len(hsp.query)
* (int(hsp.identities) / float(len(hsp.query)))
for hsp in align.hsps)))
)
for alignment in blast_record.alignments:
# Setting the e-value as 1 and bit as 0 to get the best
......@@ -117,6 +130,8 @@ class Blaster():
tmp = alignment.title.split(" ")
sbjct_header = tmp[1]
# DEBUG
print("Found: {}".format(sbjct_header))
bit = hsp.bits
sbjct_length = alignment.length
sbjct_start = hsp.sbjct_start
......@@ -129,7 +144,8 @@ class Blaster():
query_start = hsp.query_start
query_end = hsp.query_end
HSP_length = len(query_string)
perc_ident = int(hsp.identities) / float(HSP_length) * 100
perc_ident = (int(hsp.identities)
/ float(HSP_length) * 100)
strand = 0
coverage = ((int(HSP_length) - int(gaps))
/ float(sbjct_length))
......@@ -139,9 +155,9 @@ class Blaster():
# cal_score is later used to select the best hit
cal_score = perc_ident * coverage
hit_id = "%s:%s..%s:%s:%f" % (contig_name, query_start,
query_end, sbjct_header,
cal_score)
hit_id = "%s:%s..%s:%s:%f" % (
contig_name, query_start, query_end,
sbjct_header, cal_score)
# If the hit is on the other strand
if sbjct_start > sbjct_end:
......@@ -149,13 +165,17 @@ class Blaster():
sbjct_start = sbjct_end
sbjct_end = tmp
query_string = self.reversecomplement(query_string)
query_string = self.reversecomplement(
query_string)
homo_string = homo_string[::-1]
sbjct_string = self.reversecomplement(sbjct_string)
sbjct_string = self.reversecomplement(
sbjct_string)
strand = 1
# Save hit
if (cut_off and perc_coverage > 20) or cut_off == False:
if((cut_off and perc_coverage > 20)
or cut_off is False):
best_hsp = {'evalue': hsp.expect,
'sbjct_header': sbjct_header,
'bit': bit,
......@@ -188,13 +208,18 @@ class Blaster():
tmp_results = gene_results
# Compare the hit results
save, gene_split, gene_results = self.compare_results(
save, best_hsp, tmp_results, tmp_gene_split,
save, gene_split, gene_results = (
self.compare_results(save, best_hsp,
tmp_results,
tmp_gene_split,
allowed_overlap)
)
# If the hit is not overlapping with other hit
# seqeunces it is kept
if save == 1:
# DEBUG
print("Saving: {}".format(hit_id))
gene_results[hit_id] = best_hsp
result_handle.close()
......@@ -344,14 +369,19 @@ class Blaster():
# sequnce but match different genes only the best hit is
# kept
if new_contig == old_contig:
print("Same contig: {} == {}".format(new_contig, old_contig))
print("\t{} vs {}".format(new_db_hit, old_db_hit))
# Check if saved hits overlaps with current hit
overlap_len = (((old_end_query - old_start_query)
hit_union_length = (max(old_end_query, new_end_query)
- min(old_start_query, new_start_query))
hit_lengths_sum = ((old_end_query - old_start_query)
+ (new_end_query - new_start_query))
- (max(old_end_query, new_end_query)
- min(old_start_query, new_start_query)))
overlap_len = (hit_lengths_sum - hit_union_length)
if overlap_len < allowed_overlap:
print("\tignore overlap ({}): {}".format(overlap_len, new_db_hit))
continue
print("\toverlap found ({}): {}".format(overlap_len, new_db_hit))
# If the two hits cover the exact same place on the
# contig only the percentage of identity is compared
......@@ -388,10 +418,9 @@ class Blaster():
break
# If new hit overlaps with the saved hit
elif((max(old_end_query, new_end_query)
- min(old_start_query, new_start_query))
<= ((old_end_query - old_start_query)
+ (new_end_query - new_start_query))):
elif(hit_union_length <= hit_lengths_sum):
print("\t{} <= {}".format(hit_union_length, hit_lengths_sum))
print("\t\tScores: {} cmp {}".format(new_score, old_score))
if new_score > old_score:
# Set to remove old gene
......@@ -404,7 +433,7 @@ class Blaster():
elif new_score == old_score:
# If both genes are of same coverage
# and identity is the same implyed by new_score == old_score
# and identity is the same implied by new_score == old_score
# hit is chosen based on length
if((int(best_hsp['perc_coverage']) ==
int(hit_data['perc_coverage']))
......
......@@ -36,6 +36,11 @@ parser.add_argument("-t", "--threshold", dest="threshold",
help="Blast threshold for identity",
type=float,
default=0.90)
parser.add_argument("--overlap",
help=("Allow hits/genes to overlap with this number of "
"nucleotides. Default: 30."),
type=int,
default=30)
args = parser.parse_args()
......@@ -94,7 +99,8 @@ for inp_file in input_list:
# Calling blast and parsing output
blast_run = Blaster(inputfile=inp_file, databases=databases,
db_path=db_path, out_path=out_path, min_cov=min_cov,
threshold=threshold, blast=blast)
threshold=threshold, blast=blast,
allowed_overlap=args.overlap)
results = blast_run.results
query_align = blast_run.gene_align_query
......
......@@ -32,9 +32,9 @@ class CGEFinder():
def kma(inputfile_1, out_path, databases, db_path_kma, min_cov=0.6,
threshold=0.9, kma_path="cge/kma/kma", sample_name="",
inputfile_2=None, kma_mrs=None, kma_gapopen=None,
kma_gapextend=None, kma_penalty=None, kma_reward=None, kma_pm=None,
kma_fpm=None, kma_memmode=False, kma_nanopore=False, debug=False,
kma_add_args=None):
kma_gapextend=None, kma_penalty=None, kma_reward=None,
kma_apm=None, kma_memmode=False, kma_nanopore=False, debug=False,
kma_add_args=None, kma_cge=False, kma_1t1=False):
"""
I expect that there will only be one hit pr gene, but if there are
more, I assume that the sequence of the hits are the same in the res
......@@ -73,10 +73,12 @@ class CGEFinder():
kma_cmd += " -penalty " + str(kma_penalty)
if(kma_reward is not None):
kma_cmd += " -reward " + str(kma_reward)
if(kma_pm is not None):
kma_cmd += " -pm " + kma_pm
if(kma_fpm is not None):
kma_cmd += " -fpm " + kma_fpm
if(kma_apm is not None):
kma_cmd += " -apm " + kma_apm
if(kma_cge):
kma_cmd += " -cge "
if(kma_1t1):
kma_cmd += " -1t1 "
if (kma_memmode):
kma_cmd += " -mem_mode "
if (kma_nanopore):
......
......@@ -258,7 +258,7 @@ class Program:
'else echo "Done" >> {stderr}; fi\n').format(
stderr=self.stderr))
f.write('exit $ec\n')
os.chmod(script, 0o744)
os.chmod(script, 0o755)
if self.queue is not None:
# Setup execution of shell script through TORQUE
......
python-cgecore (1.5.0-1) unstable; urgency=medium
* New upstream version
* debhelper-compat 12
* Standards-Version: 4.4.0
* Testsuite: autopkgtest-pkg-python
* Set upstream metadata fields: Name.
-- Andreas Tille <tille@debian.org> Thu, 01 Aug 2019 22:24:02 +0200
python-cgecore (1.4.4-1) unstable; urgency=medium
* Initial release (Closes: #929496)
......
......@@ -2,13 +2,14 @@ Source: python-cgecore
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Andreas Tille <tille@debian.org>
Section: python
Testsuite: autopkgtest-pkg-python
Priority: optional
Build-Depends: debhelper (>= 12~),
Build-Depends: debhelper-compat (= 12),
dh-python,
python3,
python3-setuptools,
python3-biopython
Standards-Version: 4.3.0
Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/python-cgecore
Vcs-Git: https://salsa.debian.org/med-team/python-cgecore.git
Homepage: https://bitbucket.org/genomicepidemiology/cge_core_module/
......