Imported Upstream version 1.4.6-p3+dfsg

parent c790db43
......@@ -82,12 +82,16 @@ void warning_file_limit()
}
}
void print_in_box(int line_width, int is_boundary, int is_center, char * pattern,...)
void print_in_box(int line_width, int is_boundary, int options, char * pattern,...)
{
int put_color_for_colon, is_center;
va_list args;
va_start(args , pattern);
char is_R_linebreak=0, * content, *out_line_buff;
put_color_for_colon = (options & PRINT_BOX_NOCOLOR_FOR_COLON)?0:1;
is_center = (options & PRINT_BOX_CENTER)?1:0;
content= malloc(1000);
out_line_buff= malloc(1000);
out_line_buff[0]=0;
......@@ -224,7 +228,7 @@ void print_in_box(int line_width, int is_boundary, int is_center, char * pattern
break;
}
}
if(col1w>0 && col1w < content_len-1)
if(col1w>0 && col1w < content_len-1 && put_color_for_colon)
{
content[col1w+1]=0;
strcat(out_line_buff,content);
......@@ -255,6 +259,7 @@ void print_in_box(int line_width, int is_boundary, int is_center, char * pattern
int show_summary(global_context_t * global_context)
{
......
SUBREAD_VERSION="1.4.6-p2"
SUBREAD_VERSION="1.4.6-p3"
STATIC_MAKE=
#STATIC_MAKE= -static
......@@ -89,6 +89,8 @@ typedef struct
unsigned long long unassigned_duplicate;
} fc_read_counters;
typedef unsigned long long read_count_type_t;
typedef struct
{
unsigned short thread_id;
......@@ -98,7 +100,8 @@ typedef struct
unsigned long long int all_reads;
//unsigned short current_read_length1;
//unsigned short current_read_length2;
unsigned int * count_table;
read_count_type_t * count_table;
read_count_type_t unpaired_fragment_no;
unsigned int chunk_read_ptr;
pthread_t thread_object;
......@@ -147,7 +150,8 @@ typedef struct
typedef struct
{
int is_gene_level;
int is_paired_end_data;
int is_paired_end_input_file;
int is_paired_end_mode_assign;
int is_multi_overlap_allowed;
int is_strand_checked;
int is_both_end_required;
......@@ -157,12 +161,15 @@ typedef struct
int is_input_file_resort_needed;
int is_SAM_file;
int is_read_details_out;
int is_SEPEmix_warning_shown;
int is_unpaired_warning_shown;
int is_stake_warning_shown;
int is_split_alignments_only;
int is_duplicate_ignored;
int do_not_sort;
int reduce_5_3_ends_to_one;
int isCVersion;
int use_fraction_multi_mapping;
int min_mapping_quality_score;
int min_paired_end_distance;
......@@ -325,13 +332,20 @@ void print_FC_configuration(fc_thread_global_context_t * global_context, char *
print_in_box(80,0,0,"");
print_in_box(80,0,0," Threads : %d", global_context->thread_number);
print_in_box(80,0,0," Level : %s level", global_context->is_gene_level?"meta-feature":"feature");
print_in_box(80,0,0," Paired-end : %s", global_context->is_paired_end_data?"yes":"no");
print_in_box(80,0,0," Paired-end : %s", global_context->is_paired_end_mode_assign?"yes":"no");
if(global_context -> do_not_sort && global_context->is_paired_end_mode_assign)
{
print_in_box(80,0,0," Sorting PE Reads : never");
print_in_box(80,0,0,"");
}
print_in_box(80,0,0," Strand specific : %s", global_context->is_strand_checked?(global_context->is_strand_checked==1?"yes":"inversed"):"no");
char * multi_mapping_allow_mode = "not counted";
if(global_context->is_multi_mapping_allowed == ALLOW_PRIMARY_MAPPING)
multi_mapping_allow_mode = "primary only";
else if(global_context->is_multi_mapping_allowed == ALLOW_ALL_MULTI_MAPPING)
multi_mapping_allow_mode = "counted";
multi_mapping_allow_mode = global_context -> use_fraction_multi_mapping?"counted (as fractions)": "counted (as integer)";
print_in_box(80,0,0," Multimapping reads : %s", multi_mapping_allow_mode);
print_in_box(80,0,0,"Multi-overlapping reads : %s", global_context->is_multi_overlap_allowed?"counted":"not counted");
if(global_context -> is_split_alignments_only)
......@@ -345,7 +359,7 @@ void print_FC_configuration(fc_thread_global_context_t * global_context, char *
if(global_context -> is_duplicate_ignored)
print_in_box(80,0,0," Duplicated Reads : ignored");
if(global_context->is_paired_end_data)
if(global_context->is_paired_end_mode_assign)
{
print_in_box(80,0,0,"");
print_in_box(80,0,0," Chimeric reads : %s", global_context->is_chimertc_disallowed?"not counted":"counted");
......@@ -378,7 +392,7 @@ void print_FC_results(fc_thread_global_context_t * global_context)
if(0){
print_in_box(80,1,1,"Summary");
print_in_box(80,0,0,"");
if(global_context->is_paired_end_data)
if(global_context->is_paired_end_mode_assign)
print_in_box(80,0,0," All fragments : %llu", global_context -> all_reads);
else
print_in_box(80,0,0," All reads : %llu", global_context -> all_reads);
......@@ -388,7 +402,7 @@ void print_FC_results(fc_thread_global_context_t * global_context)
else
print_in_box(80,0,0," Features : %u", global_context -> exontable_exons);
if(global_context->is_paired_end_data)
if(global_context->is_paired_end_mode_assign)
print_in_box(80,0,0," Assigned fragments : %llu", global_context -> read_counters.assigned_reads);
else
print_in_box(80,0,0," Assigned reads : %llu", global_context -> read_counters.assigned_reads);
......@@ -983,6 +997,98 @@ int strcmp_slash(char * s1, char * s2)
return nch != *s2;
}
#define NH_FRACTION_INT 65536
unsigned int calc_fixed_fraction(int nh){
if(nh==1) return NH_FRACTION_INT;
else if(nh == 2) return NH_FRACTION_INT>>1;
else return NH_FRACTION_INT / nh;
}
int calc_float_fraction(read_count_type_t score, read_count_type_t * integer_count, double * float_count){
if(score % NH_FRACTION_INT == 0){
(*integer_count) = score / NH_FRACTION_INT;
return 0;
}else{
(*float_count) = score * 1./NH_FRACTION_INT;
return 1;
}
}
void print_read_wrapping(char * rl, int is_second){
int refill_spaces = 3;
int read_length = 0, x1 = 0, spaces=0;
for(x1 = 0; x1 < 3100; x1++){
if(rl[x1]==0 && rl[x1+1]==0)break;
if(rl[x1]=='0' || rl[x1]=='\t') spaces++;
read_length ++;
}
char *out_buf1 = malloc(read_length + spaces * refill_spaces + 1), out_buf2[100];
int ox=0;
for(x1 = 0; x1 < 3000; x1++){
if(rl[x1]=='\n' || (rl[x1]==0 && rl[x1+1]==0)){
out_buf1[ox]=0;
break;
} else if((rl[x1]==0 && rl[x1+1]!=0) || rl[x1] == '\t'){
int x2;
for(x2 = 0; x2 < refill_spaces ; x2++){
out_buf1[ox]=' ';
ox++;
}
} else {
out_buf1[ox]=rl[x1];
ox++;
}
}
out_buf1[ox] = 0;
x1=0;
while(1){
int x2;
for(x2 = 0; x2 < 67 ; x2 ++){
char nch = out_buf1[x1];
if(nch == 0) break;
out_buf2[x2] = nch;
x1++;
}
out_buf2[x2] = 0;
print_in_box(80,0,PRINT_BOX_NOCOLOR_FOR_COLON," %s", out_buf2);
if(out_buf1[x1] == 0)break;
}
free(out_buf1);
}
void report_unpair_warning(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context, int * this_noproperly_paired_added){
//printf("WARN:%d [%d]\n", global_context->is_unpaired_warning_shown, thread_context -> thread_id);
if(!global_context->is_unpaired_warning_shown)
{
global_context->is_unpaired_warning_shown=1;
print_in_box(80,0,0," Found reads that are not properly paired.");
print_in_box(80,0,0," (missing mate or the mate is not the next read)");
if(global_context -> do_not_sort){
print_in_box(85,0,0," %c[31mHowever, the reads will not be re-ordered.", 27);
}else{
global_context->redo = 1;
}
print_in_box(80,0,0," Below are the two reads that are not properly paired:");
print_read_wrapping(thread_context -> line_buffer1,0);
print_read_wrapping(thread_context -> line_buffer2,1);
}
if(0==(*this_noproperly_paired_added))thread_context -> unpaired_fragment_no++;
(*this_noproperly_paired_added) = 1;
}
void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context)
{
......@@ -995,15 +1101,17 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
short * hits_total_length1 = thread_context -> hits_total_length1 , * hits_total_length2 = thread_context -> hits_total_length2;
int is_second_read;
int maximum_NH_value = 1;
int skipped_for_exonic = 0;
int first_read_quality_score = 0;
int this_noproperly_paired_added = 0;
thread_context->all_reads++;
//if(thread_context->all_reads>1000000) printf("TA=%llu\n%s\n",thread_context->all_reads, thread_context -> line_buffer1);
for(is_second_read = 0 ; is_second_read < 2; is_second_read++)
{
if(is_second_read && !global_context -> is_paired_end_data) break;
if(is_second_read && !global_context -> is_paired_end_mode_assign) break;
char * line = is_second_read? thread_context -> line_buffer2:thread_context -> line_buffer1;
......@@ -1028,17 +1136,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
}
//printf("R1=%s; R2=%s\n",read_name,read_name1 );
if(strcmp_slash(read_name,read_name1)!=0)
{
//printf("WARN:%d [%d]\n", global_context->is_unpaired_warning_shown, thread_context -> thread_id);
if(!global_context->is_unpaired_warning_shown)
{
// printf("RV:%s,%s\n", read_name, read_name1);
global_context->is_unpaired_warning_shown=1;
global_context->redo = 1;
print_in_box(80,0,0," Found reads that are not properly paired.");
print_in_box(80,0,0," (missing mate or the mate is not the next read)");
}
}
report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
}
}
else
......@@ -1052,18 +1150,31 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
if(is_second_read == 0)
{
//skip the read if unmapped (its mate will be skipped as well if paired-end)
if( ((!global_context -> is_paired_end_data) && (alignment_masks & SAM_FLAG_UNMAPPED) ) ||
((alignment_masks & SAM_FLAG_UNMAPPED) && (alignment_masks & SAM_FLAG_MATE_UNMATCHED) && global_context -> is_paired_end_data) ||
(((alignment_masks & SAM_FLAG_UNMAPPED) || (alignment_masks & SAM_FLAG_MATE_UNMATCHED)) && global_context -> is_paired_end_data && global_context -> is_both_end_required)
if( ((!global_context -> is_paired_end_mode_assign) && (alignment_masks & SAM_FLAG_UNMAPPED) ) ||
((alignment_masks & SAM_FLAG_UNMAPPED) && (alignment_masks & SAM_FLAG_MATE_UNMATCHED) && global_context -> is_paired_end_mode_assign) ||
(((alignment_masks & SAM_FLAG_UNMAPPED) || (alignment_masks & SAM_FLAG_MATE_UNMATCHED)) && global_context -> is_paired_end_mode_assign && global_context -> is_both_end_required)
){
thread_context->read_counters.unassigned_unmapped ++;
if(global_context -> SAM_output_fp)
fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Unmapped\t*\t*\n", read_name);
if(global_context -> is_paired_end_mode_assign){
char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
if(strcmp_slash(read_name,read_name2)!=0)
report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
}
return; // do nothing if a read is unmapped, or the first read in a pair of reads is unmapped.
}
}
if(global_context -> is_paired_end_mode_assign && (!global_context ->is_SEPEmix_warning_shown)){
if(((!global_context -> is_paired_end_input_file) && ( alignment_masks & SAM_FLAG_PAIRED_TASK )) || ((global_context -> is_paired_end_input_file) && 0 == ( alignment_masks & SAM_FLAG_PAIRED_TASK ))){
print_in_box(85,0,0," %c[31mBoth single-end and paired-end reads were found.", 27);
global_context ->is_SEPEmix_warning_shown = 1;
}
}
read_chr = strtok_r(NULL,"\t", &tmp_tok_ptr);
......@@ -1085,17 +1196,23 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
int mapping_qual =atoi(mapping_qual_str);
//printf("SECOND=%d; FIRST=%d; THIS=%d; Q=%d\n", is_second_read, first_read_quality_score, mapping_qual, );
if(( mapping_qual < global_context -> min_mapping_quality_score && ! global_context -> is_paired_end_data)||( is_second_read && max( first_read_quality_score, mapping_qual ) < global_context -> min_mapping_quality_score))
if(( mapping_qual < global_context -> min_mapping_quality_score && ! global_context -> is_paired_end_mode_assign)||( is_second_read && max( first_read_quality_score, mapping_qual ) < global_context -> min_mapping_quality_score))
{
thread_context->read_counters.unassigned_mappingquality ++;
if(global_context -> is_paired_end_mode_assign && 0==is_second_read){
char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
if(strcmp_slash(read_name,read_name2)!=0)
report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
}
if(global_context -> SAM_output_fp)
{
fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_MappingQuality\t*\tMapping_Quality=%d,%d\n", read_name, first_read_quality_score, mapping_qual);
}
return;
}
if(is_second_read==0 && global_context -> is_paired_end_data)
if(is_second_read==0 && global_context -> is_paired_end_mode_assign)
{
first_read_quality_score = mapping_qual;
}
......@@ -1114,7 +1231,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
}
if(is_second_read == 0 && global_context -> is_paired_end_data &&
if(is_second_read == 0 && global_context -> is_paired_end_mode_assign &&
(global_context -> is_PE_distance_checked || global_context -> is_chimertc_disallowed)
)
{
......@@ -1138,6 +1255,13 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
{
if(global_context -> is_PE_distance_checked && ((fragment_length > global_context -> max_paired_end_distance) || (fragment_length < global_context -> min_paired_end_distance)))
{
if(global_context -> is_paired_end_mode_assign && 0==is_second_read){
char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
if(strcmp_slash(read_name,read_name2)!=0)
report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
}
thread_context->read_counters.unassigned_fragmentlength ++;
if(global_context -> SAM_output_fp)
......@@ -1149,6 +1273,13 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
{
if(global_context -> is_chimertc_disallowed)
{
if(global_context -> is_paired_end_mode_assign && 0==is_second_read){
char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
if(strcmp_slash(read_name,read_name2)!=0)
report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
}
thread_context->read_counters.unassigned_chimericreads ++;
if(global_context -> SAM_output_fp)
......@@ -1168,6 +1299,12 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
{
if(alignment_masks & SAM_FLAG_DUPLICATE)
{
if(global_context -> is_paired_end_mode_assign && 0==is_second_read){
char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
if(strcmp_slash(read_name,read_name2)!=0)
report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
}
thread_context->read_counters.unassigned_duplicate ++;
if(global_context -> SAM_output_fp)
fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Duplicate\t*\t*\n", read_name);
......@@ -1179,6 +1316,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
if(SAM_FLAG_UNMAPPED & alignment_masks) continue;
int NH_value = 1;
char * NH_pos = strstr(tmp_tok_ptr,"\tNH:i:");
if(NH_pos)
{
......@@ -1188,16 +1326,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
if(is_second_read && read_1_chr)
{
if((strcmp(read_1_chr, mate_chr)!=0 || mate_pos!=read_1_pos) && read_1_chr[0] != '*' && mate_chr[0]!='*')
{
// printf("RV:%s,%s %d,%d\n", read_1_chr, mate_chr, mate_pos, read_1_pos);
if(!global_context->is_unpaired_warning_shown)
{
global_context->is_unpaired_warning_shown=1;
global_context->redo = 1;
print_in_box(80,0,0," Found reads that are not properly paired.");
print_in_box(80,0,0," (missing mate or mate is not the next read)");
}
}
report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
}
else
{
......@@ -1214,14 +1343,37 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
if(global_context -> SAM_output_fp)
fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_MultiMapping\t*\t*\n", read_name);
if(global_context -> is_paired_end_mode_assign && is_second_read == 0){
char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
if(strcmp_slash(read_name,read_name2)!=0)
report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
}
return;
}
}
int nh_i, NHtmpi=0;
for(nh_i = 6; nh_i < 15; nh_i++){
char nch = NH_pos[nh_i];
if(isdigit(nch)){
NHtmpi = NHtmpi * 10 + (nch-'0');
}else{break; }
}
NH_value = NHtmpi;
}
maximum_NH_value = max(maximum_NH_value, NH_value);
// if a pair of reads have one secondary, the entire fragment is seen as secondary.
if((alignment_masks & SAM_FLAG_SECONDARY_MAPPING) && (global_context -> is_multi_mapping_allowed == ALLOW_PRIMARY_MAPPING))
{
if(global_context -> is_paired_end_mode_assign && is_second_read == 0){
char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
if(strcmp_slash(read_name,read_name2)!=0)
report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
}
thread_context->read_counters.unassigned_secondary ++;
if(global_context -> SAM_output_fp)
......@@ -1411,8 +1563,14 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
if(global_context->is_split_alignments_only)
{
skipped_for_exonic ++;
if((is_second_read && skipped_for_exonic == 2) || (!global_context -> is_paired_end_data) || (alignment_masks & 0x8))
if((is_second_read && skipped_for_exonic == 2) || (!global_context -> is_paired_end_mode_assign) || (alignment_masks & 0x8))
{
if(global_context -> is_paired_end_mode_assign && is_second_read == 0){
char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
if(strcmp_slash(read_name,read_name2)!=0)
report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
}
if(global_context -> SAM_output_fp)
fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Nonjunction\t*\t*\n", read_name);
......@@ -1434,7 +1592,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
// both meta-feature mode and feature mode use the same strategy.
// two ends in a fragment is considered individually; the overlapping bases are not added up.
int ends;
for(ends = 0; ends < global_context -> is_paired_end_data + 1; ends++)
for(ends = 0; ends < global_context -> is_paired_end_mode_assign + 1; ends++)
{
long * hits_indices = ends?hits_indices2:hits_indices1;
short * hits_total_length = ends?hits_total_length2:hits_total_length1;
......@@ -1479,10 +1637,12 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
}
}
int fixed_fractional_count = global_context -> use_fraction_multi_mapping ?calc_fixed_fraction(maximum_NH_value): NH_FRACTION_INT;
if(nhits2+nhits1==1)
{
long hit_exon_id = nhits2?hits_indices2[0]:hits_indices1[0];
thread_context->count_table[hit_exon_id]++;
thread_context->count_table[hit_exon_id] += fixed_fractional_count;
thread_context->nreads_mapped_to_exon++;
if(global_context -> SAM_output_fp)
{
......@@ -1495,7 +1655,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
else if(nhits2 == 1 && nhits1 == 1 && hits_indices2[0]==hits_indices1[0])
{
long hit_exon_id = hits_indices1[0];
thread_context->count_table[hit_exon_id]++;
thread_context->count_table[hit_exon_id] += fixed_fractional_count;
thread_context->nreads_mapped_to_exon++;
if(global_context -> SAM_output_fp)
{
......@@ -1510,7 +1670,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
if(nhits2+nhits1>=MAX_HIT_NUMBER)
{
print_in_box(80,0,0,"A %s overlapped with %d features.", global_context -> is_paired_end_data?"fragment":"read", nhits2+nhits1);
print_in_box(80,0,0,"A %s overlapped with %d features.", global_context -> is_paired_end_mode_assign?"fragment":"read", nhits2+nhits1);
print_in_box(80,0,0,"Please increase MAX_HIT_NUMBER in the program");
}
......@@ -1521,7 +1681,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
for(is_second_read = 0; is_second_read < 2; is_second_read++)
{
if(is_second_read && !global_context -> is_paired_end_data) break;
if(is_second_read && !global_context -> is_paired_end_mode_assign) break;
long * hits_indices = is_second_read?hits_indices2:hits_indices1;
int nhits = is_second_read?nhits2:nhits1;
if (nhits<1) continue;
......@@ -1632,7 +1792,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
if(top_voters == 1 && (!global_context -> is_multi_overlap_allowed))
{
thread_context->count_table[top_voter_id]++;
thread_context->count_table[top_voter_id] += fixed_fractional_count;
thread_context->nreads_mapped_to_exon++;
if(global_context -> SAM_output_fp)
{
......@@ -1661,7 +1821,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
{
long tmp_voter_id = (global_context -> is_gene_level)?decision_table_exon_ids[xk1]:decision_table_ids[xk1];
//printf("TVID=%ld; HITID=%d\n", tmp_voter_id, xk1);
thread_context->count_table[tmp_voter_id]++;
thread_context->count_table[tmp_voter_id] += fixed_fractional_count;
if(global_context -> SAM_output_fp)
{
......@@ -1743,7 +1903,7 @@ void * feature_count_worker(void * vargs)
//if(buffer_read_ptr>= global_context->input_buffer_max_size)
// if(buffer_read_ptr>6*1024*1024) printf("REALLY BIG PTR:%u = %u + %u - %u\n", buffer_read_ptr, thread_context->input_buffer_write_ptr , global_context->input_buffer_max_size, thread_context->input_buffer_remainder);
for(is_second_read = 0; is_second_read < (global_context->is_paired_end_data ? 2:1); is_second_read++)
for(is_second_read = 0; is_second_read < (global_context->is_paired_end_mode_assign ? 2:1); is_second_read++)
{
char * curr_line_buff = is_second_read?thread_context -> line_buffer2:thread_context -> line_buffer1;
//printf("R=%u; WPTR=%u ;RPTR=%u\n", thread_context->input_buffer_remainder, thread_context->input_buffer_write_ptr, buffer_read_ptr);
......@@ -1760,6 +1920,7 @@ void * feature_count_worker(void * vargs)
buffer_read_ptr = 0;
if(nch=='\n' || buffer_read_bytes>2998){
curr_line_buff[buffer_read_bytes+1]=0;
curr_line_buff[buffer_read_bytes+2]=0;
break;
}
}
......@@ -1861,7 +2022,7 @@ void * feature_count_worker(void * vargs)
while(PDATA_ptr < PDATA_len)
{
int is_second_read;
for(is_second_read = 0; is_second_read <= global_context -> is_paired_end_data; is_second_read++)
for(is_second_read = 0; is_second_read <= global_context -> is_paired_end_mode_assign; is_second_read++)
{
int binary_read_len, local_PDATA_ptr = PDATA_ptr;
char * curr_line_buff = is_second_read?thread_context -> line_buffer2:thread_context -> line_buffer1;
......@@ -1883,11 +2044,12 @@ void * feature_count_worker(void * vargs)
}
}
void fc_thread_merge_results(fc_thread_global_context_t * global_context, unsigned int * nreads , unsigned long long int *nreads_mapped_to_exon, fc_read_counters * my_read_counter)
void fc_thread_merge_results(fc_thread_global_context_t * global_context, read_count_type_t * nreads , unsigned long long int *nreads_mapped_to_exon, fc_read_counters * my_read_counter)
{
int xk1, xk2;
long long int total_input_reads = 0 ;
read_count_type_t unpaired_fragment_no = 0;
(*nreads_mapped_to_exon)=0;
......@@ -1899,6 +2061,7 @@ void fc_thread_merge_results(fc_thread_global_context_t * global_context, unsign
}
total_input_reads += global_context -> thread_contexts[xk1].all_reads;
(*nreads_mapped_to_exon) += global_context -> thread_contexts[xk1].nreads_mapped_to_exon;
unpaired_fragment_no += global_context -> thread_contexts[xk1].unpaired_fragment_no;
global_context -> read_counters.unassigned_ambiguous += global_context -> thread_contexts[xk1].read_counters.unassigned_ambiguous;
global_context -> read_counters.unassigned_nofeatures += global_context -> thread_contexts[xk1].read_counters.unassigned_nofeatures;
......@@ -1931,8 +2094,11 @@ void fc_thread_merge_results(fc_thread_global_context_t * global_context, unsign
sprintf(pct_str,"(%.1f%%%%)", (*nreads_mapped_to_exon)*100./total_input_reads);
else pct_str[0]=0;
print_in_box(80,0,0," Total %s : %llu", global_context -> is_paired_end_data?"fragments":"reads", total_input_reads);
print_in_box(pct_str[0]?81:80,0,0," Successfully assigned %s : %llu %s", global_context -> is_paired_end_data?"fragments":"reads", *nreads_mapped_to_exon,pct_str);
if(unpaired_fragment_no){
print_in_box(80,0,0," Not properly paired fragments : %llu", unpaired_fragment_no);
}
print_in_box(80,0,0," Total %s : %llu", global_context -> is_paired_end_mode_assign?"fragments":"reads", total_input_reads);
print_in_box(pct_str[0]?81:80,0,0," Successfully assigned %s : %llu %s", global_context -> is_paired_end_mode_assign?"fragments":"reads", *nreads_mapped_to_exon,pct_str);
print_in_box(80,0,0," Running time : %.2f minutes", (miltime() - global_context -> start_time)/60);
print_in_box(80,0,0,"");
}
......@@ -1977,7 +2143,7 @@ HashTable * load_alias_table(char * fname)
return ret;
}
void fc_thread_init_global_context(fc_thread_global_context_t * global_context, unsigned int buffer_size, unsigned short threads, int line_length , int is_PE_data, int min_pe_dist, int max_pe_dist, int is_gene_level, int is_overlap_allowed, int is_strand_checked, char * output_fname, int is_sam_out, int is_both_end_required, int is_chimertc_disallowed, int is_PE_distance_checked, char *feature_name_column, char * gene_id_column, int min_map_qual_score, int is_multi_mapping_allowed, int is_SAM, char * alias_file_name, char * cmd_rebuilt, int is_input_file_resort_needed, int feature_block_size, int isCVersion, int fiveEndExtension, int threeEndExtension, int minReadOverlap, int is_split_alignments_only, int reduce_5_3_ends_to_one, char * debug_command, int is_duplicate_ignored)
void fc_thread_init_global_context(fc_thread_global_context_t * global_context, unsigned int buffer_size, unsigned short threads, int line_length , int is_PE_data, int min_pe_dist, int max_pe_dist, int is_gene_level, int is_overlap_allowed, int is_strand_checked, char * output_fname, int is_sam_out, int is_both_end_required, int is_chimertc_disallowed, int is_PE_distance_checked, char *feature_name_column, char * gene_id_column, int min_map_qual_score, int is_multi_mapping_allowed, int is_SAM, char * alias_file_name, char * cmd_rebuilt, int is_input_file_resort_needed, int feature_block_size, int isCVersion, int fiveEndExtension, int threeEndExtension, int minReadOverlap, int is_split_alignments_only, int reduce_5_3_ends_to_one, char * debug_command, int is_duplicate_ignored, int is_not_sort, int use_fraction_multimapping)
{
global_context -> input_buffer_max_size = buffer_size;
......@@ -1989,7 +2155,7 @@ void fc_thread_init_global_context(fc_thread_global_context_t * global_context,
global_context -> isCVersion = isCVersion;
global_context -> is_read_details_out = is_sam_out;
global_context -> is_multi_overlap_allowed = is_overlap_allowed;
global_context -> is_paired_end_data = is_PE_data;
global_context -> is_paired_end_mode_assign = is_PE_data;
global_context -> is_gene_level = is_gene_level;
global_context -> is_strand_checked = is_strand_checked;
global_context -> is_both_end_required = is_both_end_required;
......@@ -1999,8 +2165,9 @@ void fc_thread_init_global_context(fc_thread_global_context_t * global_context,
global_context -> is_split_alignments_only = is_split_alignments_only;
global_context -> is_duplicate_ignored = is_duplicate_ignored;
global_context -> reduce_5_3_ends_to_one = reduce_5_3_ends_to_one;
global_context -> do_not_sort = is_not_sort;
global_context -> is_SAM_file = is_SAM;
global_context -> use_fraction_multi_mapping = use_fraction_multimapping;
global_context -> thread_number = threads;
global_context -> min_mapping_quality_score = min_map_qual_score;
......@@ -2092,7 +2259,7 @@ int fc_thread_start_threads(fc_thread_global_context_t * global_context, int et_
global_context -> thread_contexts[xk1].input_buffer = malloc(global_context -> input_buffer_max_size);
global_context -> thread_contexts[xk1].thread_id = xk1;
global_context -> thread_contexts[xk1].chunk_read_ptr = 0;
global_context -> thread_contexts[xk1].count_table = calloc(sizeof(unsigned int), et_exons);
global_context -> thread_contexts[xk1].count_table = calloc(sizeof(read_count_type_t), et_exons);
global_context -> thread_contexts[xk1].nreads_mapped_to_exon = 0;
global_context -> thread_contexts[xk1].all_reads = 0;
global_context -> thread_contexts[xk1].line_buffer1 = malloc(global_context -> line_length + 2);
......@@ -2100,6 +2267,7 @@ int fc_thread_start_threads(fc_thread_global_context_t * global_context, int et_
global_context -> thread_contexts[xk1].chro_name_buff = malloc(CHROMOSOME_NAME_LENGTH);
global_context -> thread_contexts[xk1].strm_buffer = malloc(sizeof(z_stream));
global_context -> thread_contexts[xk1].unpaired_fragment_no = 0;
global_context -> thread_contexts[xk1].read_counters.assigned_reads = 0;
global_context -> thread_contexts[xk1].read_counters.unassigned_ambiguous = 0;
global_context -> thread_contexts[xk1].read_counters.unassigned_nofeatures = 0;
......@@ -2205,11 +2373,21 @@ int resort_input_file(fc_thread_global_context_t * global_context)
}
void fc_write_final_gene_results(fc_thread_global_context_t * global_context, int * et_geneid, char ** et_chr, long * et_start, long * et_stop, unsigned char * et_strand, const char * out_file, int features, unsigned int ** column_numbers, char * file_list, int n_input_files, fc_feature_info_t * loaded_features, int header_out)
void BUFstrcat(char * targ, char * src, char ** buf){
int srclen = strlen(src);
if( (*buf) == NULL){
(*buf) = targ;
}
memcpy((*buf), src, srclen);
(*buf) += srclen;
(**buf) = 0;
}
void fc_write_final_gene_results(fc_thread_global_context_t * global_context, int * et_geneid, char ** et_chr, long * et_start, long * et_stop, unsigned char * et_strand, const char * out_file, int features, read_count_type_t ** column_numbers, char * file_list, int n_input_files, fc_feature_info_t * loaded_features, int header_out)
{
int xk1;
int genes = global_context -> gene_name_table -> numOfElements;
unsigned int *gene_columns;
read_count_type_t *gene_columns;
FILE * fp_out = f_subr_open(out_file,"w");
if(!fp_out){
......@@ -2241,7 +2419,7 @@ void fc_write_final_gene_results(fc_thread_global_context_t * global_context, in
}
fprintf(fp_out,"\n");
gene_columns = calloc(sizeof(unsigned int) , genes * non_empty_files);
gene_columns = calloc(sizeof(read_count_type_t) , genes * non_empty_files);
unsigned int * gene_exons_number = calloc(sizeof(unsigned int) , genes);
unsigned int * gene_exons_pointer = calloc(sizeof(unsigned int) , genes);
unsigned int * gene_exons_start = malloc(sizeof(unsigned int) * features);
......@@ -2294,16 +2472,20 @@ void fc_write_final_gene_results(fc_thread_global_context_t * global_context, in
unsigned int * input_start_stop_list = malloc(longest_gene_exons * sizeof(int) * 2);
unsigned int * output_start_stop_list = malloc(longest_gene_exons * sizeof(int) * 2);
char * out_chr_list = malloc(longest_gene_exons * (1+global_context -> longest_chro_name) + 1);
char * out_start_list = malloc(11 * longest_gene_exons + 1);
char * out_end_list = malloc(11 * longest_gene_exons + 1);
char * out_strand_list = malloc(2 * longest_gene_exons + 1);
char * out_chr_list = malloc(longest_gene_exons * (1+global_context -> longest_chro_name) + 1), * tmp_chr_list = NULL;
char * out_start_list = malloc(11 * longest_gene_exons + 1), * tmp_start_list = NULL;
char * out_end_list = malloc(11 * longest_gene_exons + 1), * tmp_end_list = NULL;
char * out_strand_list = malloc(2 * longest_gene_exons + 1), * tmp_strand_list = NULL;
for(xk1 = 0 ; xk1 < genes; xk1++)
{
int xk2;
memset(is_occupied,0,gene_exons_pointer[xk1]);
tmp_chr_list = NULL;
tmp_start_list = NULL;
tmp_end_list = NULL;
tmp_strand_list = NULL;
out_chr_list[0]=0;
out_start_list[0]=0;
out_end_list[0]=0;
......@@ -2340,15 +2522,15 @@ void fc_write_final_gene_results(fc_thread_global_context_t * global_context, in
for(xk3=0; xk3<merged_gaps; xk3++)
{
char numbbuf[12];
strcat(out_chr_list, matched_chr);
strcat(out_chr_list, ";");
BUFstrcat(out_chr_list, matched_chr, &tmp_chr_list);
BUFstrcat(out_chr_list, ";", &tmp_chr_list);
sprintf(numbbuf,"%u;", output_start_stop_list[xk3 * 2]);
strcat(out_start_list, numbbuf);
BUFstrcat(out_start_list, numbbuf, &tmp_start_list);
sprintf(numbbuf,"%u;", output_start_stop_list[xk3 * 2 + 1] - 1);
strcat(out_end_list, numbbuf);
BUFstrcat(out_end_list, numbbuf, &tmp_end_list);
sprintf(numbbuf,"%c;", matched_strand?'-':'+');
strcat(out_strand_list, numbbuf);
BUFstrcat(out_strand_list, numbbuf, &tmp_strand_list);
gene_nonoverlap_len += output_start_stop_list[xk3 * 2 + 1] - output_start_stop_list[xk3 * 2];
}
......@@ -2372,7 +2554,14 @@ void fc_write_final_gene_results(fc_thread_global_context_t * global_context, in
{
if(column_numbers[i_files])
{
fprintf(fp_out,"\t%u", gene_columns[i_files+non_empty_files*xk1]);
read_count_type_t longlong_res = 0;
double double_res = 0;
int is_double_number = calc_float_fraction(gene_columns[i_files+non_empty_files*xk1], &longlong_res, &double_res);
if(is_double_number){
fprintf(fp_out,"\t%.2f", double_res);
}else{
fprintf(fp_out,"\t%llu", longlong_res);
}
}
}
fprintf(fp_out,"\n");
......@@ -2396,7 +2585,7 @@ void fc_write_final_gene_results(fc_thread_global_context_t * global_context, in