New upstream version 1.6.0+dfsg

parent 9d897ea2
This diff is collapsed.
......@@ -14,22 +14,27 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped coverageCount
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped coverageCount
mkdir -p ../bin/utilities
mv subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount subtools qualityScores propmapped subread-fullscan removeDup ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
@echo "# Installation successfully complete. #"
@echo "# Installation successfully completed. #"
@echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #"
@echo "###########################################################"
@echo
sublong: longread-mapping/longread-mapping.c ${ALL_OBJECTS}
echo "MACOS= -D FREEBSD " > longread-mapping/make.version
rm -f longread-mapping/*.o
cd longread-mapping && $(MAKE)
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
propmapped: propmapped.c ${ALL_OBJECTS}
${CC} -o propmapped propmapped.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -14,20 +14,25 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # samMappedBases mergeVCF testZlib
all: sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # samMappedBases mergeVCF testZlib
mkdir -p ../bin/utilities
mv subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount propmapped qualityScores removeDup subread-fullscan txUnique ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
@echo "# Installation successfully complete. #"
@echo "# Installation successfully completed. #"
@echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #"
@echo "###########################################################"
@echo
sublong: longread-mapping/longread-mapping.c ${ALL_OBJECTS}
echo " " > longread-mapping/make.version
rm -f longread-mapping/*.o
cd longread-mapping && $(MAKE)
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -11,20 +11,24 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # globalReassembly testZlib
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # globalReassembly testZlib
mkdir -p ../bin/utilities
mv subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount subread-fullscan qualityScores removeDup propmapped ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
@echo "# Installation successfully complete. #"
@echo "# Installation successfully completed. #"
@echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #"
@echo "###########################################################"
@echo
sublong: longread-mapping/longread-mapping.c ${ALL_OBJECTS}
echo "MACOS= -D MACOS " > longread-mapping/make.version
rm -f longread-mapping/*.o
cd longread-mapping && $(MAKE)
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -314,10 +314,8 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
global_context->config.multi_best_reads=1;
global_context->config.reported_multi_best_reads = global_context->config.multi_best_reads;
global_context->config.max_vote_combinations = max(global_context->config.max_vote_combinations, global_context->config.reported_multi_best_reads + 1);
global_context->config.max_vote_simples = max(global_context->config.max_vote_simples, global_context->config.reported_multi_best_reads + 1);
global_context->config.report_multi_mapping_reads = 1;
break;
case 'H':
global_context->config.use_hamming_distance_break_ties = 1;
......@@ -576,6 +574,9 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
return -1;
}
if(global_context->config.reported_multi_best_reads > 1 && ! global_context->config.report_multi_mapping_reads)
SUBREADprintf("WARNING: You required multi best alignments, but disallowed multi-mapping reads. You need to turn on the multi-mapping option.\n");
global_context->config.more_accurate_fusions = global_context->config.more_accurate_fusions && global_context->config.do_fusion_detection;
if(global_context->config.more_accurate_fusions)
{
......
......@@ -465,7 +465,6 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
global_context->config.max_vote_combinations = max(global_context->config.max_vote_combinations, global_context->config.reported_multi_best_reads + 1);
global_context->config.max_vote_simples = max(global_context->config.max_vote_simples, global_context->config.reported_multi_best_reads + 1);
break;
case 'c':
global_context->config.space_type = GENE_SPACE_COLOR;
......@@ -618,6 +617,8 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
return -1;
}
if(global_context->config.reported_multi_best_reads > 1 && ! global_context->config.report_multi_mapping_reads)
SUBREADprintf("WARNING: You required multi best alignments, but disallowed multi-mapping reads. You need to turn on the multi-mapping option.\n");
if(global_context->config.is_SAM_file_input) global_context->config.phred_score_format = FASTQ_PHRED33;
global_context->config.more_accurate_fusions = global_context->config.more_accurate_fusions && global_context->config.do_fusion_detection;
......
......@@ -110,6 +110,41 @@ void debug_show_event(global_context_t* global_context, chromosome_event_t * eve
SUBREADprintf("Event between %s and %s\n", outpos1, outpos2);
}
int get_offset_maximum_chro_pos(global_context_t * global_context, thread_context_t * thread_context, unsigned int linear){
gene_offset_t * chros =& global_context -> chromosome_table;
int n = 0;
int total_offsets = chros -> total_offsets;
int LL = 0, RR = total_offsets-1;
while(1){
if(LL >= RR-1) break;
int MM = (LL+RR)/2;
if( linear > chros->read_offsets[MM]) LL = MM;
else if(linear < chros->read_offsets[MM]) RR = MM;
else break;
}
n = max(0, LL - 2);
for (; n < chros -> total_offsets; n++) {
if (chros->read_offsets[n] > linear) {
int ret;
unsigned int last_linear = 0;
if(n==0)
ret = chros->read_offsets[0] - chros -> padding *2 +16;
else{
ret = ( chros->read_offsets[n] - chros->read_offsets[n-1] ) - chros -> padding *2 +16;
last_linear = chros->read_offsets[n-1];
}
linear -= last_linear;
if(linear < chros -> padding || linear >= chros -> padding + ret) return -1;
return ret;
}
}
return -2;
}
// read_head_abs_pos is the offset of the FIRST WANTED base.
void search_events_to_front(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_head_abs_offset, short remainder_len, short sofar_matched, int suggested_movement, int do_not_jump)
......@@ -2547,7 +2582,7 @@ unsigned int explain_read(global_context_t * global_context, thread_context_t *
back_search_tail_position = current_result -> selected_position + back_search_read_tail + current_result -> indels_in_confident_coverage;
//if( back_search_read_tail > 102)
// SUBREADprintf("MAX back_search_read_tail : MIN %d , %d\n", explain_context.full_read_len , current_result -> confident_coverage_end);
//SUBREADprintf("MAX back_search_read_tail : MIN %d , %d\n", explain_context.full_read_len , current_result -> confident_coverage_end);
explain_context.tmp_search_junctions[0].read_pos_end = back_search_read_tail;
explain_context.tmp_search_junctions[0].abs_offset_for_start = back_search_tail_position;
......@@ -2762,8 +2797,11 @@ int find_soft_clipping(global_context_t * global_context, thread_context_t * th
// add the new base
char reference_base = gvindex_get(current_value_index, added_base_index + mapped_pos);
//SUBREADprintf("CHMAT [%s] ref:read = %c:%c\n", search_to_tail?"T":"H", reference_base, read_text[added_base_index]);
if(0){
char outpos1[100];
absoffset_to_posstr(global_context, added_base_index + mapped_pos, outpos1);
SUBREADprintf("CHMAT [%s] %s (%u) ref:read = %c:%c\n", search_to_tail?"T":"H" ,outpos1, added_base_index + mapped_pos, reference_base, read_text[added_base_index]);
}
int added_is_matched = (reference_base == read_text[added_base_index]);
matched_in_window += added_is_matched;
if(added_is_matched)
......@@ -2820,6 +2858,13 @@ int final_CIGAR_quality(global_context_t * global_context, thread_context_t * th
//SUBREADprintf("Coverage : %d ~ %d\n", covered_start, covered_end);
if(0){
char posout1[100];
int chro_max = get_offset_maximum_chro_pos(global_context,thread_context,read_head_abs_offset);
absoffset_to_posstr(global_context, read_head_abs_offset, posout1);
SUBREADprintf("READ %s : mapped to %s ; max_pos=%d\n", read_name, posout1, chro_max);
}
while(1)
{
char nch = cigar_string[cigar_cursor++];
......@@ -2841,6 +2886,18 @@ int final_CIGAR_quality(global_context_t * global_context, thread_context_t * th
int has_clipping_this_section_head = 0, has_clipping_this_section_tail = 0;
char * reversed_first_section_text = NULL;
if(0){
int is_head_in_chro = get_offset_maximum_chro_pos(global_context,thread_context, current_perfect_section_abs );
int is_end_in_chro = get_offset_maximum_chro_pos(global_context,thread_context, current_perfect_section_abs + tmp_int );
char posout1[100];
char posout2[100];
int chro_max = get_offset_maximum_chro_pos(global_context,thread_context, current_perfect_section_abs );
absoffset_to_posstr(global_context, current_perfect_section_abs, posout1);
absoffset_to_posstr(global_context, current_perfect_section_abs + tmp_int, posout2);
SUBREADprintf(" %dM SECTION : mapped to %s ~ %s ; max_pos=%d ; Hin=%d, Ein=%d\n", tmp_int, posout1, posout2, chro_max, is_head_in_chro, is_end_in_chro);
SUBREADprintf(" %dM SECTION : Hin=%d, Ein=%d\n", tmp_int, is_head_in_chro, is_end_in_chro);
}
// find "J" sections if it is the first M
if(is_First_M && global_context -> config.show_soft_cliping)
{
......@@ -2859,6 +2916,7 @@ int final_CIGAR_quality(global_context_t * global_context, thread_context_t * th
}
else
head_soft_clipped = find_soft_clipping(global_context, thread_context, current_value_index, read_text, current_perfect_section_abs, tmp_int, 0, adj_coverage_start);
//SUBREADprintf("SSHEAD:%d\n", head_soft_clipped);
if(head_soft_clipped == tmp_int){
(*full_section_clipped) = 1;
......@@ -2891,6 +2949,8 @@ int final_CIGAR_quality(global_context_t * global_context, thread_context_t * th
else
tail_soft_clipped = find_soft_clipping(global_context, thread_context, current_value_index, read_text + read_cursor, current_perfect_section_abs, tmp_int, 1, adj_coverage_end);
//SUBREADprintf("SSTAIL:%d\n", tail_soft_clipped);
if(tail_soft_clipped == tmp_int){
tail_soft_clipped = 0;
if(full_section_clipped)(*full_section_clipped) = 1;
......
......@@ -1191,11 +1191,13 @@ unsigned int move_to_read_head(unsigned int tailpos, char * cigar){
// This function returns 1 if the cut was added.
// It returns 0 if the head or tail cut is not able to be added (e.g., the cigar ends up like "50M4I10S" or "10S30N90M")
int add_head_tail_cut_softclipping(char * cigar, int rlen, int head_cut, int tail_cut){
int add_head_tail_cut_softclipping(global_context_t * global_context, unsigned int linear, char * cigar, int rlen, int head_cut, int tail_cut){
//SUBREADprintf("ADD_SOFT: %s , %d, %d\n", cigar,head_cut,tail_cut);
char cigar_added [CORE_MAX_CIGAR_STR_LEN];
int cigar_cursor = 0, read_cursor = 0, next_read_cursor = 0;
int tmpi = 0, nch, has_M = 0;
unsigned int linear_cursor = linear;
cigar_added[0]=0;
......@@ -1209,6 +1211,13 @@ int add_head_tail_cut_softclipping(char * cigar, int rlen, int head_cut, int tai
if('M' == nch || 'S' == nch || 'I' == nch)
next_read_cursor = read_cursor + tmpi;
if('M' == nch || 'D' == nch || 'S' == nch || 'N' == nch){
int is_start_in_chro, is_end_in_chro;
is_start_in_chro = get_offset_maximum_chro_pos(global_context,linear_cursor);
is_end_in_chro = get_offset_maximum_chro_pos(global_context,linear_cursor + tmpi);
linear_cursor += tmpi;
}
int head_S = 0, tail_S =0, remainder_tmpi = tmpi, is_skip = 0;
if(next_read_cursor <= head_cut) is_skip = 1;
......@@ -1322,26 +1331,12 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
{
int head_cut = 0 , tail_cut = 0;
if(0 && FIXLENstrcmp("V0112_0155:7:1302:9507:32993", read_name)==0){
char posout1[100];
absoffset_to_posstr(global_context, r->linear_position, posout1);
SUBREADprintf("PERR : CIGAR=%s, READLEN=%d, POS=%s\n", r->cigar , read_len, posout1);
}
if(locate_gene_position_max(r->linear_position,& global_context -> chromosome_table, &r-> chro , &r -> offset, &head_cut, &tail_cut, global_context->config.do_fusion_detection?read_len:current_result->chromosomal_length)) {
if(locate_gene_position_max(r->linear_position + r->soft_clipping_movements,& global_context -> chromosome_table, &r-> chro , &r -> offset, &head_cut, &tail_cut, global_context->config.do_fusion_detection?read_len:(current_result->chromosomal_length - r->soft_clipping_movements))) {
is_r_OK = 0;
} else {
if(0 && FIXLENstrcmp("V0112_0155:7:1302:9507:32993", read_name)==0){
char posout1[100];
absoffset_to_posstr(global_context, r->linear_position, posout1);
SUBREADprintf("CUTT : CIGAR=%s, READLEN=%d, CATS=%d %d\n", r->cigar , read_len, head_cut, tail_cut);
}
int is_added_OK = 1;
if(head_cut!=0 || tail_cut!=0)
is_added_OK = add_head_tail_cut_softclipping(r->cigar , read_len, head_cut, tail_cut);
is_added_OK = add_head_tail_cut_softclipping(global_context, r->linear_position, r->cigar , read_len, head_cut, tail_cut);
if(is_added_OK){
r -> offset++;
......@@ -1922,13 +1917,13 @@ void write_single_fragment(global_context_t * global_context, thread_context_t *
}
if(1)if(is_funky & FUNKY_FRAGMENT_BC){
//#warning "LOGIC WRONG: R1 AND R2 SHOULD BE DECIDED BY THEIR MAPPING POSITIONS"
bktable_append(&global_context -> funky_table_BC, rec1 -> chro, rec1 -> offset + rec1 -> soft_clipping_movements, NULL + (2*pair_number));
bktable_append(&global_context -> funky_table_BC, rec2 -> chro, rec2 -> offset + rec2 -> soft_clipping_movements, NULL + (2*pair_number+1));
bktable_append(&global_context -> funky_table_BC, rec1 -> chro, rec1 -> offset , NULL + (2*pair_number));
bktable_append(&global_context -> funky_table_BC, rec2 -> chro, rec2 -> offset , NULL + (2*pair_number+1));
}
if(1)if(is_funky & FUNKY_FRAGMENT_DE){
fraglist_append(&global_context -> funky_list_DE, pair_number);
bktable_append(&global_context -> funky_table_DE, rec1 -> chro, rec1 -> offset + rec1 -> soft_clipping_movements, NULL + (2*pair_number + (rec1 -> offset > rec2 -> offset ? 1:0)));
bktable_append(&global_context -> funky_table_DE, rec2 -> chro, rec2 -> offset + rec2 -> soft_clipping_movements, NULL + (2*pair_number + (rec1 -> offset < rec2 -> offset ? 1:0)));
bktable_append(&global_context -> funky_table_DE, rec1 -> chro, rec1 -> offset , NULL + (2*pair_number + (rec1 -> offset > rec2 -> offset ? 1:0)));
bktable_append(&global_context -> funky_table_DE, rec2 -> chro, rec2 -> offset , NULL + (2*pair_number + (rec1 -> offset < rec2 -> offset ? 1:0)));
}
}
......@@ -2120,7 +2115,11 @@ void write_single_fragment(global_context_t * global_context, thread_context_t *
if(is_R1_OK && is_R2_OK)
{
if( rec1->offset > rec2->offset) out_tlen1 = - out_tlen1;
else out_tlen2 = -out_tlen2;
else if(rec2->offset > rec1->offset) out_tlen2 = -out_tlen2;
else{
if( rec1 -> strand ) out_tlen1 = - out_tlen1;
else out_tlen2 = -out_tlen2;
}
}
if(0==current_location)
......@@ -2144,11 +2143,11 @@ void write_single_fragment(global_context_t * global_context, thread_context_t *
}
if(is_R1_OK){
out_offset1 = max(1, rec1->offset + rec1 -> soft_clipping_movements);
out_offset1 = max(1, rec1->offset);
out_mapping_quality1 = rec1->mapping_quality;
}
if(is_R2_OK){
out_offset2 = max(1, rec2->offset + rec2 -> soft_clipping_movements);
out_offset2 = max(1, rec2->offset);
out_mapping_quality2 = rec2->mapping_quality;
}
......@@ -3506,6 +3505,11 @@ int run_maybe_threads(global_context_t *global_context, int task)
void * thr_parameters [5];
int ret_value =0;
if(task == STEP_ITERATION_TWO){
global_context -> last_written_fragment_number = 0;
}
if(global_context->config.all_threads<2) {
thr_parameters[0] = global_context;
thr_parameters[1] = NULL;
......@@ -3528,8 +3532,6 @@ int run_maybe_threads(global_context_t *global_context, int task)
memset(thread_contexts, 0, sizeof(thread_context_t)*64);
global_context -> all_thread_contexts = thread_contexts;
if(task == STEP_ITERATION_TWO)
global_context -> last_written_fragment_number = 0;
for(current_thread_no = 0 ; current_thread_no < global_context->config.all_threads ; current_thread_no ++)
{
......@@ -3563,11 +3565,11 @@ int run_maybe_threads(global_context_t *global_context, int task)
global_context -> not_properly_pairs_only_one_end_mapped += thread_contexts[current_thread_no].not_properly_pairs_only_one_end_mapped;
global_context -> all_multimapping_reads += thread_contexts[current_thread_no].all_multimapping_reads;
global_context -> all_uniquely_mapped_reads += thread_contexts[current_thread_no].all_uniquely_mapped_reads;
}
ret_value += *(ret_values + current_thread_no);
if(ret_value)break;
}
for(current_thread_no = 0 ; current_thread_no < global_context->config.all_threads ; current_thread_no ++){
if(thread_contexts[current_thread_no].output_buffer_item > 0)
SUBREADprintf("ERROR: UNFINISHED OUTPUT!\n");
......@@ -3685,7 +3687,6 @@ int read_chunk_circles(global_context_t *global_context)
global_context -> timecost_load_index += period_load_index;
global_context -> current_value_index = global_context -> all_value_indexes + global_context->current_index_block_number;
if(global_context->current_index_block_number ==0 && global_context -> all_processed_reads==0)
global_context->align_start_time = miltime();
......
......@@ -444,13 +444,13 @@ int locate_gene_position_max(unsigned int linear, const gene_offset_t* offsets ,
int total_offsets = offsets -> total_offsets;
int GENE_LOCATE_JUMP = total_offsets/4;
int GENE_LOCATE_JUMP = total_offsets/3;
while (GENE_LOCATE_JUMP > 5)
while (GENE_LOCATE_JUMP >3)
{
while(n+GENE_LOCATE_JUMP < total_offsets && offsets->read_offsets[n+GENE_LOCATE_JUMP] <= linear)
n+=GENE_LOCATE_JUMP;
GENE_LOCATE_JUMP /=4;
GENE_LOCATE_JUMP /=3;
}
for (; offsets->read_offsets[n]; n++)
......@@ -462,18 +462,29 @@ int locate_gene_position_max(unsigned int linear, const gene_offset_t* offsets ,
//SUBREADprintf("max=%u <= lim=%u : ACCEPTED.\n", rl + linear , offsets->read_offsets[n] + 16);
// the end of the read should not excess the end of the chromosome
if(tail_cut_length == NULL){
if(rl + linear > offsets->read_offsets[n] + 15 - offsets -> padding) return 1;
} else {
(*tail_cut_length) = linear + rl - ( offsets->read_offsets[n] + 15 - offsets -> padding);
if( (*tail_cut_length) >= rl )return 1;
}
if (n==0)
*pos = linear;
else
*pos = linear - offsets->read_offsets[n-1];
if(tail_cut_length == NULL){
if(rl + linear > offsets->read_offsets[n] + 15 - offsets -> padding) return 1;
} else {
unsigned int posn1 = 0;
if(n>0) posn1 = offsets->read_offsets[n-1];
long long int tct = ( linear + rl - posn1 - offsets -> padding );
if(tct < rl)tct = rl;
long long int chro_leng = (offsets->read_offsets[n] - posn1 - 2*offsets -> padding + 16);
//SUBREADprintf("CHRO_LEN : %lld, READ_TAIL %lld , RL=%d\n", chro_leng, tct, rl);
tct -= chro_leng;
if( tct >= rl )return 1;
if( tct <0 )tct=0;
(*tail_cut_length) = tct;
}
if( (*pos) < offsets -> padding ) {
if(head_cut_length == NULL || (*pos) + rl <= offsets -> padding){
return 1;
......
......@@ -53,17 +53,56 @@ void gvindex_baseno2offset(unsigned int base_number, gene_value_index_t * index,
* offset_bit = base_number % 4 * 2;
}
int is_offset_in_chro(gene_value_index_t * offsets, gehash_data_t linear){
int ret = 1;
if(offsets -> appendix1 && offsets -> appendix2){
gene_offset_t * chros = offsets -> appendix1;
int padding = offsets -> appendix2 - NULL;
// SUBREADprintf( "OFFSETS:%d, PADD:%d\n", chros -> total_offsets, padding );
int n = 0;
int total_offsets = chros -> total_offsets;
int LL = 0, RR = total_offsets-1;
while(1){
if(LL >= RR-1) break;
int MM = (LL+RR)/2;
if( linear > chros->read_offsets[MM]) LL = MM;
else if(linear < chros->read_offsets[MM]) RR = MM;
else break;
}
n = max(0, LL - 2);
for (; n < chros -> total_offsets; n++) {
if (chros->read_offsets[n] > linear) {
unsigned int pos;
if (n==0)
pos = linear;
else
pos = linear - chros->read_offsets[n-1];
if( pos < chros -> padding || pos >= ( chros->read_offsets[n] - chros->read_offsets[n-1] - chros -> padding ))
ret = 0;
SUBREADprintf("INCHRO:%d ; POS:%d\n", ret, pos);
break;
}
}
}
return ret;
}
// return 'A', 'G', 'T' and 'C'
int gvindex_get(gene_value_index_t * index, gehash_data_t offset)
{
unsigned int offset_byte, offset_bit;
gvindex_baseno2offset_m(offset, index , offset_byte, offset_bit);
//if(!is_offset_in_chro( index, offset ))return -1;
gvindex_baseno2offset_m(offset, index , offset_byte, offset_bit);
if(offset_byte >= index-> values_bytes -1)return 'N';
unsigned int one_base_value = (index->values [offset_byte]) >> (offset_bit);
//SUBREADprintf("RECV_BASE=%d (%d - %d)\n",one_base_value & 3, offset_byte , offset_bit);
return int2base(one_base_value & 3);
......@@ -151,6 +190,7 @@ int gvindex_dump(gene_value_index_t * index, const char filename [])
int gvindex_load(gene_value_index_t * index, const char filename [])
{
memset(index,0, sizeof(gene_value_index_t));
FILE * fp = f_subr_open(filename, "rb");
int read_length;
read_length = fread(&index->start_point,4,1, fp);
......
This diff is collapsed.
/***************************************************************
The Subread software package is free software package:
you can redistribute it and/or modify it under the terms
of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
Subread is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
Authors: Drs Yang Liao and Wei Shi
***************************************************************/
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include "LRMconfig.h"
#include "LRMbase-index.h"
#include "LRMfile-io.h"
#define LRMgvindex_baseno2offset_m(base_number, index, offset_byte, offset_bit) {offset_byte = (base_number - index -> start_base_offset) >>2; offset_bit = base_number % 4 * 2;}
void LRMgvindex_baseno2offset(unsigned int base_number, LRMgene_value_index_t * index, unsigned int * offset_byte, unsigned int * offset_bit)
{
// the base number corrsponding to the 0-th bit in the whole value array;
unsigned int offset = (base_number - index -> start_base_offset);
* offset_byte = offset >>2 ;
* offset_bit = base_number % 4 * 2;
}
// return 'A', 'G', 'T' and 'C'
int LRMgvindex_get(LRMgene_value_index_t * index, LRMgehash_data_t offset)
{
unsigned int offset_byte, offset_bit;
LRMgvindex_baseno2offset_m(offset, index , offset_byte, offset_bit);
if(offset_byte >= index-> values_bytes -1)return 'N';
unsigned int one_base_value = (index->values [offset_byte]) >> (offset_bit);
//LRMprintf("RECV_BASE=%d (%d - %d)\n",one_base_value & 3, offset_byte , offset_bit);
return LRMint2base(one_base_value & 3);
}
int LRMgvindex_match(LRMgene_value_index_t * index, LRMgehash_data_t offset, LRMgehash_key_t base_values)
{
unsigned int offset_byte, offset_bit;
LRMgvindex_baseno2offset_m(offset, index , offset_byte, offset_bit);
int i, ret = 0;
for (i=0; i<16; i++)
{
unsigned char mask = 0x3 << (offset_bit);
unsigned char one_base_value = (index->values [offset_byte] & mask) >> (8-offset_bit);
if ( ((base_values >> (30 - i*2)) & 0x3) == one_base_value)
ret |= 1 << i;
offset_bit +=2;
if(offset_bit >=8)
{
offset_bit = 0;
offset_byte ++;
}
}
return ret;
}
int LRMgvindex_load(LRMgene_value_index_t * index, const char filename [])
{
FILE * fp = fopen(filename, "rb");
int read_length;
read_length = fread(&index->start_point,4,1, fp);
if(read_length<1){
LRMprintf("ERROR: the array index is incomplete : %d", read_length );
return 1;
}
read_length = fread(&index->length,4,1, fp);
if(read_length<1){
LRMprintf("Bad index\n");
return 1;
}
//LRMprintf ("\nBINDEX %s : %u ~ +%u\n",filename, index->start_point, index->length );
unsigned int useful_bytes, useful_bits;
index -> start_base_offset = index -> start_point - index -> start_point%4;
LRMgvindex_baseno2offset (index -> length+ index -> start_point, index ,&useful_bytes,&useful_bits);
index -> values = malloc(useful_bytes+1);
index -> values_bytes = useful_bytes+1;
if(!index->values)
{
LRMprintf("Out of memory\n");
return 1;
}
read_length =fread(index->values, 1, useful_bytes+1, fp);
if(read_length < useful_bytes){
LRMprintf("ERROR: the array index is incomplete : %d < %d.", read_length, useful_bytes+1 );
return 1;
}
fclose(fp);
return 0;
}
int gvindex_get(LRMgene_value_index_t * index, unsigned int offset){
unsigned int offset_byte, offset_bit;
LRMgvindex_baseno2offset_m(offset, index , offset_byte, offset_bit);
if(offset_byte >= index-> values_bytes -1)return 'N';
unsigned int one_base_value = (index->values [offset_byte]) >> (offset_bit);
return LRMint2base(one_base_value & 3);
}
void LRMgvindex_get_string(char *buf, LRMgene_value_index_t * index, unsigned int pos, int len, int is_negative_strand){
int i;
if (is_negative_strand)
for (i=len-1;i>=0;i--)
{
buf[i] = LRMgvindex_get (index, pos + len - 1 - i);
switch(buf[i])
{
case 'A': buf[i] = 'T'; break;
case 'G': buf[i] = 'C'; break;
case 'C': buf[i] = 'G'; break;
default: buf[i] = 'A';
}
}
else
for (i=0;i<len;i++)
buf[i] = LRMgvindex_get (index, pos +i);
}
int LRMvalidate_mapping(LRMcontext_t * context, char * read, char * cigar, LRMgene_value_index_t * index, unsigned int pos, int neg, int * maplen, int show_txt){
unsigned int chro_cursor = pos;
int read_chrsor = 0, all_matched = 0, all_mismatched = 0;
int tmpi = 0,cigar_i, nch, tmpi_sign = 1, x1;
if(neg) LRMreverse_read(read, strlen(read));
if(show_txt){
char postxt[100];
LRMpos2txt(context, chro_cursor, postxt);
LRMprintf("Starting Pos : Read + %d ( %s )\n", read_chrsor, postxt);
}
for(cigar_i = 0; (nch = cigar[cigar_i])!=0; cigar_i++){
if(nch == '-') tmpi_sign = -1;
else if(nch >='0' && nch <='9'){
tmpi = tmpi * 10 + (nch - '0');
}else{
tmpi *= tmpi_sign;
if(nch == 'M'){
int this_matched = LRMmatch_chro(read + read_chrsor, index, chro_cursor, tmpi, 0);
if(show_txt){
unsigned txt_chro_cursor = chro_cursor;
int txt_read_chrsor = read_chrsor;
for(x1 = 0; x1 < tmpi ; x1++){
int knownval = LRMgvindex_get(index, txt_chro_cursor);
int readval = read[txt_read_chrsor];
txt_chro_cursor ++;
txt_read_chrsor ++;
LRMprintf("%c[3%dm%c", 27, knownval == readval ? 7:1, readval);
}
}
if(0 && abs(tmpi) > 22 && this_matched < tmpi * 0.6){
LRMprintf("Too many mismatched (%d%c) : %d / %d : read + %d\n", tmpi, nch, this_matched, tmpi, read_chrsor);
}
all_matched += this_matched;
all_mismatched += ( tmpi - this_matched );
(*maplen) += tmpi;
}