Skip to content
Commits on Source (4)
......@@ -2,6 +2,13 @@
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).
## [0.16.0] - 2016-05-09
### Fixed
- Outer gene in intron gene cases now designated as a ciLocus.
- ciLoci are now exempt from merging into miLoci.
- Fixes to how polycistrons and other edge cases are handled.
## [0.15.2] - 2016-03-17
### Fixed
......
v0.15.2 stable
v0.16.0 stable
......@@ -8,7 +8,7 @@
#!annotation-source NCBI Acromyrmex echinatior Annotation Release 100
NW_011626563.1 AEGeAn::LocusPocus locus 1190754 1200781 . . . child_gene=1;child_mRNA=2;riil=769;effective_length=10028;iLocus_type=siLocus
NW_011626563.1 AEGeAn::LocusPocus locus 1200782 1201550 . . . fg_orient=FR;effective_length=769;iLocus_type=iiLocus
NW_011626563.1 AEGeAn::LocusPocus locus 1201551 1566216 . . . effective_length=364666;iiLocus_exception=complex-overlap-3;liil=769;riil=8884;iLocus_type=siLocus;child_gene=1;child_mRNA=1
NW_011626563.1 AEGeAn::LocusPocus locus 1201551 1566216 . . . iLocus_type=ciLocus;effective_length=364666;iiLocus_exception=complex-overlap-3;liil=769;riil=8884;child_gene=1;child_mRNA=1
NW_011626563.1 AEGeAn::LocusPocus locus 1216473 1219475 . . . iiLocus_exception=intron-gene;liil=0;riil=0;iLocus_type=siLocus;child_gene=1;child_mRNA=2
NW_011626563.1 AEGeAn::LocusPocus locus 1537022 1540189 . . . liil=0;riil=0;iLocus_type=niLocus;child_gene=1;child_ncRNA=1
NW_011626563.1 AEGeAn::LocusPocus locus 1566217 1575100 . . . fg_orient=RR;effective_length=8884;iLocus_type=iiLocus
......
......@@ -6,5 +6,5 @@
#!genome-build-accession NCBI_Assembly:GCF_000220905.1
#!annotation-date 7 April 2015
#!annotation-source NCBI Megachile rotundata Annotation Release 101
NW_003797177.1 AEGeAn::LocusPocus locus 9652 19311 . . . effective_length=9660;iLocus_type=siLocus;child_gene=1;child_mRNA=1
NW_003797177.1 AEGeAn::LocusPocus locus 9652 19311 . . . iLocus_type=ciLocus;effective_length=9660;child_gene=1;child_mRNA=1
NW_003797177.1 AEGeAn::LocusPocus locus 11405 18146 . . . iiLocus_exception=intron-gene;iLocus_type=siLocus;child_gene=1;child_mRNA=4
......@@ -7,6 +7,6 @@
#!annotation-date 3 June 2014
#!annotation-source NCBI Nasonia vitripennis Annotation Release 101
NC_015867.2 AEGeAn::LocusPocus locus 370926 373152 . . . child_gene=1;child_mRNA=1;right_overlap=262;iiLocus_exception=delta-overlap-delta;riil=0;effective_length=1965;iLocus_type=siLocus
NC_015867.2 AEGeAn::LocusPocus locus 372891 380536 . . . effective_length=6758;iLocus_type=siLocus;child_gene=1;child_mRNA=1
NC_015867.2 AEGeAn::LocusPocus locus 372891 380536 . . . iLocus_type=ciLocus;effective_length=6758;child_gene=1;child_mRNA=1
NC_015867.2 AEGeAn::LocusPocus locus 374998 378903 . . . iiLocus_exception=intron-gene;iLocus_type=siLocus;child_gene=1;child_mRNA=1
NC_015867.2 AEGeAn::LocusPocus locus 379649 381835 . . . left_overlap=888;liil=0;child_gene=1;child_mRNA=1;effective_length=2187;iLocus_type=siLocus
......@@ -43,7 +43,7 @@ Executing command: bin/parseval --datashare=data/share/ --outformat=html --
</tbody>
</table>
<h2>Gene loci <span class="tooltip">[?]<span class="tooltip_text">If a gene annotation overlaps with another gene annotation, those annotations are associated with the same gene locus. See <a target="_blank" href="http://aegean.readthedocs.org/en/refactor/loci.html">this page</a> for a formal definition of a locus annotation.</span></span></h2>
<h2>Gene loci <span class="tooltip">[?]<span class="tooltip_text">If a gene annotation overlaps with another gene annotation, those annotations are associated with the same gene locus. See <a target="_blank" href="http://aegean.readthedocs.io/en/latest/loci.html">this page</a> for a formal definition of a locus annotation.</span></span></h2>
<table class="table_normal">
<tr><td>shared</td><td>7</td></tr>
<tr><td>unique to reference</td><td>1</td></tr>
......
......@@ -11,29 +11,67 @@ import re
import sys
class Locus(object):
def __init__(self, line):
self._rawdata = line
self.fields = line.strip().split('\t')
assert len(self.fields) == 9
@property
def seqid(self):
return self.fields[0]
@property
def start(self):
return int(self.fields[3])
@property
def end(self):
return int(self.fields[4])
@property
def ilocus_class(self):
typematch = re.search('iLocus_type=([^;\n]+)', self.fields[8])
assert typematch, 'could not determine iLocus type: ' + self._rawdata
return typematch.group(1)
@property
def mergeable(self):
if self.ilocus_class not in ['siLocus', 'niLocus']:
return False
if 'iiLocus_exception=intron-gene' in self.fields[8]:
return False
return True
def __len__(self):
return self.end - self.start + 1
def __str__(self):
return '\t'.join(self.fields)
def strip(self):
self.fields[8] = re.sub('ID=[^;\n]+;*', '', self.fields[8])
self.fields[8] = re.sub('Name=[^;\n]+;*', '', self.fields[8])
def merge_iloci(loci):
"""Merge ajacent or overlapping gene-containing iLoci."""
assert len(loci) > 0
if len(loci) == 1:
line = re.sub('ID=[^;\n]+;*', '', loci[0])
line = re.sub('Name=[^;\n]+;*', '', line)
return line
loci[0].strip()
return loci[0]
seqid = None
start, end = -1, -1
attrs = {}
for locus in loci:
fields = locus.split('\t')
assert len(fields) == 9
if seqid:
assert fields[0] == seqid
seqid = fields[0]
lstart = int(fields[3])
lend = int(fields[4])
if start == -1 or lstart < start:
start = lstart
end = max(end, lend)
numeric_attrs = re.findall('([^;=]+=\d+)', fields[8])
assert locus.seqid == seqid
seqid = locus.seqid
if start == -1 or locus.start < start:
start = locus.start
end = max(end, locus.end)
numeric_attrs = re.findall('([^;=]+=\d+)', locus.fields[8])
for key_value_pair in numeric_attrs:
assert '=' in key_value_pair, \
'malformed key/value pair %s' % key_value_pair
......@@ -49,8 +87,9 @@ def merge_iloci(loci):
for key in sorted(attrs):
attrstring += ';%s=%d' % (key, attrs[key])
gff3 = [seqid, 'AEGeAn::miloci.py', 'locus', str(start), str(end),
'%d' % len(loci), '.', '.', attrstring]
return '\t'.join(gff3)
str(len(loci)), '.', '.', attrstring]
line = '\t'.join(gff3)
return Locus(line)
def parse_iloci(fp):
......@@ -59,34 +98,28 @@ def parse_iloci(fp):
Output: merged iLoci; gene-containing iLoci that are adjacent or
overlapping are combined
"""
seqid = None
prev_loci = []
locus_buffer = []
for line in fp:
line = line.rstrip()
if '\tlocus\t' not in line:
continue
locus = Locus(line)
if len(locus_buffer) > 0 and locus.seqid != locus_buffer[0].seqid:
yield merge_iloci(locus_buffer)
locus_buffer = []
locusseqid = re.match('([^\t]+)', line).group(1)
if seqid is None:
seqid = locusseqid
elif locusseqid != seqid:
if len(prev_loci) > 0:
yield merge_iloci(prev_loci)
prev_loci = []
seqid = locusseqid
if ';child_gene=' in line:
prev_loci.append(line)
if locus.mergeable:
locus_buffer.append(locus)
continue
else:
if len(prev_loci) > 0:
yield merge_iloci(prev_loci)
prev_loci = []
line = re.sub('ID=[^;\n]+;*', '', line)
line = re.sub('Name=[^;\n]+;*', '', line)
yield line
if len(prev_loci) > 0:
yield merge_iloci(prev_loci)
if len(locus_buffer) > 0:
yield merge_iloci(locus_buffer)
locus_buffer = []
locus.strip()
yield locus
if len(locus_buffer) > 0:
yield merge_iloci(locus_buffer)
if __name__ == '__main__':
......
aegean (0.16.0+dfsg-1) unstable; urgency=medium
* New upstream release.
* Update watchfile to reflect move to BrendelGroup org.
-- Sascha Steinbiss <satta@debian.org> Fri, 05 Oct 2018 18:03:42 +0200
aegean (0.15.2+dfsg-2) unstable; urgency=medium
[ Andreas Tille ]
......
version=3
opts="repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
https://github.com/standage/AEGeAn/tags .*/v?(\d.*)\.(?:tgz|tbz2|txz|tar\.(?:gz|bz2|xz))
https://github.com/BrendelGroup/AEGeAn/tags .*/v?(\d.*)\.(?:tgz|tbz2|txz|tar\.(?:gz|bz2|xz))
......@@ -403,7 +403,7 @@ static void compare_report_html_footer(FILE *outstream)
fprintf(outstream,
" <p class=\"footer\">\n"
" Generated by <a href=\"%s\">AEGeAn %s (%s %s)</a>.<br />\n"
" Copyright © %s <a href=\"http://aegean.readthedocs.org/en/"
" Copyright © %s <a href=\"http://aegean.readthedocs.io/en/"
"latest/contrib.html\">AEGeAn authors</a>.<br />\n"
" See <a href=\"LICENSE\">LICENSE</a> for details."
" </p>\n", AGN_VERSION_LINK, AGN_SEMANTIC_VERSION,
......@@ -1127,7 +1127,7 @@ static void compare_report_html_summary_annot(AgnCompInfo *info,
" <h2>Gene loci <span class=\"tooltip\">[?]<span class=\"tooltip_text\">If a gene "
"annotation overlaps with another gene annotation, those annotations are associated "
"with the same gene locus. See <a target=\"_blank\" "
"href=\"http://aegean.readthedocs.org/en/refactor/loci.html\">"
"href=\"http://aegean.readthedocs.io/en/latest/loci.html\">"
"this page</a> for a formal definition of a locus annotation.</span></span></h2>\n"
" <table class=\"table_normal\">\n"
" <tr><td>shared</td><td>%lu</td></tr>\n"
......
......@@ -311,6 +311,8 @@ static bool refine_locus_check_intron_genes(AgnLocusRefineStream *stream,
GtStr *seqid = gt_genome_node_get_seqid(*gn1);
AgnLocus *locus = agn_locus_new(seqid);
agn_locus_add_feature(locus, fn1);
gt_feature_node_add_attribute((GtFeatureNode *)locus, "iLocus_type",
"ciLocus");
gt_genome_node_ref(*gn1);
gt_array_add(iloci, locus);
......@@ -408,11 +410,14 @@ static void locus_refine_stream_extend(AgnLocusRefineStream *stream,
for(i = 0; i < numloci; i++)
{
GtGenomeNode **gn = gt_array_get(iloci, i);
GtFeatureNode *fn = gt_feature_node_cast(*gn);
if(i == 0)
coding_status = agn_locus_num_mrnas(*gn) > 0;
{
coding_status = agn_typecheck_count(fn, agn_typecheck_cds) > 0;
}
else
{
bool test_status = agn_locus_num_mrnas(*gn) > 0;
bool test_status = agn_typecheck_count(fn, agn_typecheck_cds) > 0;
same_coding_status = coding_status == test_status;
if(!same_coding_status)
break;
......@@ -467,8 +472,11 @@ static void locus_refine_stream_extend(AgnLocusRefineStream *stream,
fprintf(stream->ilenfile, "%s\t0\n", gt_str_get(seqid));
}
}
if(gt_feature_node_get_attribute(fn, "iLocus_type") == NULL)
{
gt_feature_node_add_attribute(fn, "iLocus_type", typestr);
}
}
if(numloci == 1)
{
char lenstr[32];
......@@ -489,7 +497,7 @@ static void locus_refine_stream_extend(AgnLocusRefineStream *stream,
GtFeatureNode *fn1 = gt_feature_node_cast(*gn1);
GtFeatureNode *fn2 = gt_feature_node_cast(*gn2);
bool cds1 = agn_locus_num_mrnas(*gn1) > 0;
bool cds1 = agn_typecheck_count(fn1, agn_typecheck_cds) > 0;;
if(cds1 == true)
{
gt_feature_node_add_attribute(fn1, "iLocus_type", "siLocus");
......@@ -624,9 +632,12 @@ static void locus_refine_stream_extend(AgnLocusRefineStream *stream,
{
type = "ciLocus";
}
if(gt_feature_node_get_attribute(fn, "iLocus_type") == NULL)
{
gt_feature_node_add_attribute(fn, "iLocus_type", type);
}
}
}
return;
}
......
......@@ -269,6 +269,9 @@ bool agn_overlap_ilocus(GtGenomeNode *f1, GtGenomeNode *f2,
if(gt_str_cmp(seqid1, seqid2) != 0)
return false;
GtRange r1 = gt_genome_node_get_range(f1);
GtRange r2 = gt_genome_node_get_range(f2);
if(by_cds)
{
GtRange c1 = agn_feature_node_get_cds_range((GtFeatureNode *)f1);
......@@ -286,13 +289,18 @@ bool agn_overlap_ilocus(GtGenomeNode *f1, GtGenomeNode *f2,
{
// Both have coding sequences, use those instead of the complete feature
// coordinates.
if(gt_range_compare(&r1, &r2) == 0)
{
// Polycistrons belong together
return true;
}
return gt_range_overlap_delta(&c1, &c2, minoverlap);
}
}
// Either we are not in CDS mode, or the features don't have a CDS.
GtRange r1 = gt_genome_node_get_range(f1);
GtRange r2 = gt_genome_node_get_range(f2);
return gt_range_overlap_delta(&r1, &r2, minoverlap);
}
......