...
 
Commits (4)
1.0.8: 12/04/18 O. Sallou
- bug fix when searching with errors, some branches in tree were not analysed
1.0.7: 11/12/17 O. Sallou
- bug fix, last character was not indexed
1.0.6: 26/10/17 O. Sallou
- Fix search with tree reduction, closes #1
1.0.5: 18/11/16 O. Sallou
- bug fix in search with reduction
1.0.4: 08/04/16 O. Sallou
......
cassiopee (1.0.8-1) unstable; urgency=medium
* New upstream release (bug fixes)
-- Olivier Sallou <osallou@debian.org> Thu, 12 Apr 2018 11:32:39 +0000
cassiopee (1.0.5-2) unstable; urgency=medium
* Team upload.
......
......@@ -4,4 +4,4 @@
#define use_openmp @USE_OPENMP@
#define SUFFIX_CHUNK_SIZE 10
\ No newline at end of file
#define SUFFIX_CHUNK_SIZE 10
......@@ -215,12 +215,11 @@ void CassieSearch::search(string suffix, bool clear) {
bool CassieSearch::searchAtreduction(const string suffix, const tree<TreeNode>::iterator sib, long counter, long tree_reducted_pos, int nbSubst, int nbIn, int nbDel, int nbN) {
//LOG(INFO) << "SEARCH AT REDUCTION";
char tree_char;
char suffix_char;
bool isequal = true;
while(counter < suffix.length() -1 && tree_reducted_pos < sib->next_length - 1 && isequal && sib->next_pos+tree_reducted_pos < this->indexer->seq_length - 1 ) {
// LOG(ERROR) << "loop in reduction";
counter++;
suffix_char = suffix[counter];
tree_reducted_pos++;
......@@ -233,9 +232,8 @@ bool CassieSearch::searchAtreduction(const string suffix, const tree<TreeNode>::
}
tree_char = this->indexer->getCharAtSuffix(sib->next_pos+tree_reducted_pos);
//LOG(INFO) << "match " << suffix_char << " with " << tree_char << " at " << tree_reducted_pos << ", max=" << sib->next_length;
bool isequal = this->isequal(tree_char,suffix_char);
// LOG(ERROR) << "match " << suffix_char << " with " << tree_char << " at " << tree_reducted_pos << ", max=" << sib->next_length;
isequal = this->isequal(tree_char,suffix_char);
if(!isequal) {
// If DNA/RNA and tree matches a N, check on max consecutive N allowed
if(this->mode!=2 && tree_char == 'n') {
......@@ -248,20 +246,29 @@ bool CassieSearch::searchAtreduction(const string suffix, const tree<TreeNode>::
nbN = 0;
}
}
if(!isequal && this->max_subst>0 && nbSubst < this->max_subst) {
// Check for substitutions
isequal = true;
nbSubst++;
// LOG(ERROR) << "not equal, can substitute? " << isequal << ", " << nbSubst << ", " << this->max_subst;
if(!isequal) {
if(this->max_subst>0 && nbSubst < this->max_subst) {
// Check for substitutions
isequal = true;
nbSubst++;
}
else {
// LOG(ERROR) << "now break equal=" << isequal;
break;
}
}
}
if(isequal) {
// LOG(ERROR) << "IS EQUAL? " << isequal;
if(counter == suffix.length() -1) {
this->getMatchesFromNode(sib, nbSubst, nbIn, nbDel);
return true;
}
else {
//LOG(INFO) << "full match but not complete, search childs";
//LOG(ERROR) << "full match but not complete, search childs";
// complete match but suffix not over, should look at childs now
this->searchAtNode(suffix, counter+1, sib, NULL, nbSubst, nbIn, nbDel, nbN);
}
......@@ -282,7 +289,7 @@ void CassieSearch::searchAtNode(string suffix, const long suffix_pos, const tree
void CassieSearch::searchAtNode(string suffix, const long suffix_pos, const tree<TreeNode>::iterator root, const tree<TreeNode>::iterator start_node,int nbSubst, int nbIn, int nbDel, int nbN) {
//LOG(INFO) << "searchAtNode" << suffix_pos << ", " << nbSubst;
// LOG(INFO) << "searchAtNode" << suffix_pos << ", " << nbSubst;
if(root!=NULL && root.number_of_children()==0) {
return;
......@@ -319,9 +326,8 @@ void CassieSearch::searchAtNode(string suffix, const long suffix_pos, const tree
char tree_char = sib->c;
char suffix_char = suffix[counter];
while(sib != last_sibling && sib.node!=0) {
//while(sib != last_sibling && sib.node!=0) {
while(sib.node!=0) {
if(this->max_indel > 0 && nbIn+nbDel < this->max_indel) {
//LOG(INFO) << "Check for indel, cur= " << sib->c;
// Move on suffix, keep same base node
......@@ -351,6 +357,7 @@ void CassieSearch::searchAtNode(string suffix, const long suffix_pos, const tree
bool isequal = this->isequal(tree_char,suffix_char);
tree<TreeNode>::iterator next_sibling = sib;
while(this->mode!=2) {
next_sibling = tr->next_sibling(next_sibling);
......@@ -399,31 +406,40 @@ void CassieSearch::searchAtNode(string suffix, const long suffix_pos, const tree
// Exact match, no more char to parse
// Search leafs
this->getMatchesFromNode(sib, nbSubst, nbIn, nbDel);
if(this->max_subst>0 && nbSubst < this->max_subst) {
// If one last substitution is allowed, also check with remaining siblings
sib = tr->next_sibling(sib);
while(sib.node!=0) {
this->getMatchesFromNode(sib, nbSubst+1, nbIn, nbDel);
sib = tr->next_sibling(sib);
}
}
break;
}
else if(sib->next_pos>=0){
//LOG(ERROR) << "-- " << sib->next_pos << ", " << sib->next_length;
//LOG(INFO) << "next " << sib->next_pos << ", " << sib->next_length;
long tree_reducted_pos = -1;
// Fix O.Sallou 08/04/16 search error with reduction
counter++;
// Fix O.Sallou 26/10/17 search error with reduction
//counter++;
bool matched = this->searchAtreduction(suffix, sib, counter, tree_reducted_pos, nbSubst, nbIn, nbDel, nbN);
break;
}
else if(nb_childs > 0) {
this->searchAtNode(suffix, counter+1, sib, nbSubst, nbIn, nbDel, nbN);
sib = tr->next_sibling(sib);
if(sib.node != 0) {
tree_char = sib->c;
}
else {
tree_char = '\0';
}
/*
last_sibling = tr->end(sib);
parentnode = sib;
sib = tr->begin(sib);
......@@ -431,6 +447,8 @@ void CassieSearch::searchAtNode(string suffix, const long suffix_pos, const tree
tree_char = sib->c;
counter++;
suffix_char = suffix[counter];
*/
}
else {
break;
......@@ -451,7 +469,6 @@ void CassieSearch::searchAtNode(string suffix, const long suffix_pos, const tree
// anyway, test siblings
sib = tr->next_sibling(sib);
if(sib.node != 0) {
tree_char = sib->c;
}
......@@ -515,7 +532,7 @@ char* CassieIndexer::loadSuffix(long pos) {
assert(pos < this->seq_length);
long suffix_len = min(this->MAX_SUFFIX,this->seq_length - pos - 1);
long suffix_len = min(this->MAX_SUFFIX,this->seq_length - pos);
//char* suffix = new char[suffix_len+1]();
delete[] this->suffix;
......@@ -621,7 +638,7 @@ void CassieIndexer::index() {
DLOG(INFO) << "Indexing " << this->filename ;
for (long i=0; i<this->seq_length-1; i++) {
for (long i=0; i<this->seq_length; i++) {
this->filltree(i);
}
......@@ -733,7 +750,7 @@ void CassieIndexer::fillTreeWithSuffix(long suffix_pos, long pos) {
char node_char = this->getCharAtSuffix(pos+suffix_pos);
//long suffix_len = this->seq_length - (pos+suffix_pos) -1;
// OSALLOU
long suffix_len = min(this->max_index_depth, this->seq_length - (pos+suffix_pos) -1);
long suffix_len = min(this->max_index_depth, this->seq_length - (pos+suffix_pos));
TreeNode* node = new TreeNode(node_char);
......@@ -766,7 +783,7 @@ void CassieIndexer::fillTreeWithSuffix(long suffix_pos, long pos) {
void CassieIndexer::fillTreeWithSuffix(tree<TreeNode>::iterator sib, long suffix_pos, long pos) {
//long suffix_len = this->seq_length - pos -1 ;
//OSALLOU
long suffix_len = min(this->max_index_depth, this->seq_length - pos -1);
long suffix_len = min(this->max_index_depth, this->seq_length - pos);
for(long i=suffix_pos;i<suffix_len;i++) {
//char node_char = suffix[i];
......@@ -815,7 +832,7 @@ void CassieIndexer::filltree(long pos) {
//long suffix_len = this->seq_length - pos - 1;
// OSALLOU
long suffix_len = min(this->max_index_depth, this->seq_length - pos - 1);
long suffix_len = min(this->max_index_depth, this->seq_length - pos);
//LOG(INFO) << "new suffix " << pos << " l= " << suffix_len;
......