Imported Upstream version 1.5.1+dfsg

parent 5c4e244d
This diff is collapsed.
This diff is collapsed.
all: all:
@echo @echo
@echo " Subread currently supports Linux, Mac OS X, FreeBSD and Solaris. Please choose the correct Makefile to build Subread." @echo " Subread currently supports Linux, Mac OS X and FreeBSD. Please choose the correct Makefile to build Subread."
@echo @echo
@echo " For building subread in Linux, please run ' make -f Makefile.Linux '." @echo " For building subread in Linux, please run ' make -f Makefile.Linux '."
@echo " For building subread in Mac OS X, please run ' make -f Makefile.MacOS '." @echo " For building subread in Mac OS X, please run ' make -f Makefile.MacOS '."
......
...@@ -21,7 +21,7 @@ all: featureCounts removeDup exactSNP subread-buildindex subindel subread-align ...@@ -21,7 +21,7 @@ all: featureCounts removeDup exactSNP subread-buildindex subindel subread-align
@echo @echo
@echo "###########################################################" @echo "###########################################################"
@echo "# #" @echo "# #"
@echo "# Installation finished. #" @echo "# Installation complete. #"
@echo "# #" @echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #" @echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #" @echo "# #"
......
...@@ -20,7 +20,7 @@ all: repair featureCounts removeDup exactSNP subread-buildindex subindel subrea ...@@ -20,7 +20,7 @@ all: repair featureCounts removeDup exactSNP subread-buildindex subindel subrea
@echo @echo
@echo "###########################################################" @echo "###########################################################"
@echo "# #" @echo "# #"
@echo "# Installation finished. #" @echo "# Installation complete. #"
@echo "# #" @echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #" @echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #" @echo "# #"
......
...@@ -18,7 +18,7 @@ all: repair featureCounts removeDup exactSNP subread-buildindex subindel subrea ...@@ -18,7 +18,7 @@ all: repair featureCounts removeDup exactSNP subread-buildindex subindel subrea
@echo @echo
@echo "###########################################################" @echo "###########################################################"
@echo "# #" @echo "# #"
@echo "# Installation finished. #" @echo "# Installation complete. #"
@echo "# #" @echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #" @echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #" @echo "# #"
......
...@@ -156,7 +156,8 @@ double fisher_exact_test(int a, int b, int c, int d) ...@@ -156,7 +156,8 @@ double fisher_exact_test(int a, int b, int c, int d)
if(1){ if(1){
double ret = fast_fisher_test_one_side(a,b,c,d, precalculated_factorial, PRECALCULATE_FACTORIAL); double ret = fast_fisher_test_one_side(a,b,c,d, precalculated_factorial, PRECALCULATE_FACTORIAL);
//SUBREADprintf("FISHER_RES %d %d %d %d %.9f %.9f\n", a,b,c,d, ret, log(ret)); //#warning ">>>>>> FOR ACCURACY MEASUREMENT ONLY <<<<<<<<"
//SUBREADprintf("FISHER_RES %d %d %d %d %.19G %.19G\n", a,b,c,d, ret, log(ret));
return ret; return ret;
}else{ }else{
int AA=a, BB=b, CC=c, DD=d; int AA=a, BB=b, CC=c, DD=d;
...@@ -1735,6 +1736,11 @@ int main_snp_calling_test(int argc,char ** argv) ...@@ -1735,6 +1736,11 @@ int main_snp_calling_test(int argc,char ** argv)
} }
} }
if(argc > optind){
SUBREADprintf("Invalid parameter '%s'\n", argv[optind]);
return -1;
}
if(out_BED_file[0]==0 || in_FASTA_file[0]==0 || (parameters.pile_file_name [0] == 0 && in_SAM_file[0]==0)) if(out_BED_file[0]==0 || in_FASTA_file[0]==0 || (parameters.pile_file_name [0] == 0 && in_SAM_file[0]==0))
{ {
SUBREADprintf("The names of the input file, the output file and the reference sequence file must be specified using -i, -o and -g options.\n"); SUBREADprintf("The names of the input file, the output file and the reference sequence file must be specified using -i, -o and -g options.\n");
......
...@@ -1447,10 +1447,11 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread ...@@ -1447,10 +1447,11 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
if(first_correct_base < last_correct_base || first_correct_base > last_correct_base + 3000) if(first_correct_base < last_correct_base || first_correct_base > last_correct_base + 3000)
SUBREADprintf("WRONG ORDER: F=%u, L=%d\n", first_correct_base , last_correct_base); SUBREADprintf("WRONG ORDER: F=%u, L=%d\n", first_correct_base , last_correct_base);
//int last_second_part_base = find_subread_end(read_len, global_context->config.total_subreads , next_correct_subread_last) ;
if(0 && FIXLENstrcmp("DB7DT8Q1:236:C2NGTACXX:2:1213:17842:64278", read_name) == 0) //#warning ">>>>>>> COMMENT NEXT BLOCK IN RELEASE <<<<<<<<"
if(0 && FIXLENstrcmp("D00491:277:C89FUANXX:7:1110:20418:31541", read_name) == 0)
//if(current_result->selected_position > 433897 - 100 && current_result->selected_position < 433897) //if(current_result->selected_position > 433897 - 100 && current_result->selected_position < 433897)
SUBREADprintf("INDEL_P03: I=%d; INDELS=%d; POS=%u; COVER=%d -- %d\n", i, indels, current_result->selected_position, last_correct_subread, next_correct_subread); SUBREADprintf("INDEL_P03: I=%d; INDELS=%d; POS=%u; COVER=%d -- %d (vote_no : %d - %d)\n", i, indels, current_result->selected_position, last_correct_base, first_correct_base, last_correct_subread, next_correct_subread);
if(global_context -> config.use_dynamic_programming_indel || read_len > EXON_LONG_READ_LENGTH) if(global_context -> config.use_dynamic_programming_indel || read_len > EXON_LONG_READ_LENGTH)
{ {
...@@ -1467,7 +1468,8 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread ...@@ -1467,7 +1468,8 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
movement_buffer[dyna_steps]=0; movement_buffer[dyna_steps]=0;
if(0 && FIXLENstrcmp("DB7DT8Q1:236:C2NGTACXX:2:1213:17842:64278", read_name) == 0) //#warning ">>>>>>> COMMENT NEXT BLOCK IN RELEASE <<<<<<<<"
if(0 && FIXLENstrcmp("D00491:277:C89FUANXX:7:1110:20418:31541", read_name) == 0)
{ {
SUBREADprintf("IR= %d %d~%d\n", dyna_steps, last_correct_base, first_correct_base); SUBREADprintf("IR= %d %d~%d\n", dyna_steps, last_correct_base, first_correct_base);
...@@ -1518,20 +1520,26 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread ...@@ -1518,20 +1520,26 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
// let's test if it is ambiguous: // let's test if it is ambiguous:
gene_value_index_t * current_value_index = thread_context?thread_context->current_value_index:global_context->current_value_index; gene_value_index_t * current_value_index = thread_context?thread_context->current_value_index:global_context->current_value_index;
int ambiguous_i, ambiguous_count=0;
int best_matched_bases = match_chro(read_text + cursor_on_read - 6, current_value_index, indel_left_boundary - 6, 6, 0, global_context->config.space_type) + //#warning ">>>>>>> COMMENT NEXT BLOCK IN RELEASE <<<<<<<<"
match_chro(read_text + cursor_on_read - min(current_indel_len,0), current_value_index, indel_left_boundary + max(0, current_indel_len), 6, 0, global_context->config.space_type); if(0 && FIXLENstrcmp("D00491:277:C89FUANXX:7:1110:20418:31541",read_name ) == 0){
for(ambiguous_i=-5; ambiguous_i<=5; ambiguous_i++) SUBREADprintf("INDEL_DDADD: abs(I=%d); INDELS=%d; PN=%d; LOC=%ul READ_CUR=%d\n",i, current_indel_len, pair_number, indel_left_boundary-1, cursor_on_read);
{
int left_match = match_chro(read_text + cursor_on_read - 6, current_value_index, indel_left_boundary - 6, 6+ambiguous_i, 0, global_context->config.space_type);
int right_match = match_chro(read_text + cursor_on_read + ambiguous_i - min(current_indel_len,0), current_value_index, indel_left_boundary + ambiguous_i + max(0, current_indel_len), 6-ambiguous_i, 0,global_context->config.space_type);
if(left_match+right_match == best_matched_bases) ambiguous_count ++;
} }
//#warning "=========== COMMENT THIS LINE ==============="
if(0 && FIXLENstrcmp("DB7DT8Q1:236:C2NGTACXX:2:1213:17842:64278", read_name) == 0) int ambiguous_count=0;
SUBREADprintf("INDEL_DDADD: abs(I=%d); INDELS=%d; PN=%d; LOC=%u\n",i, current_indel_len, pair_number, indel_left_boundary-1); //#warning " >>>>>>>> MAKE SURE DISABLING THE NEXT BLOCK IS HARMLESS <<<<<<<<< "
if(0){
int ambiguous_i, best_matched_bases = match_chro(read_text + cursor_on_read - 6, current_value_index, indel_left_boundary - 6, 6, 0, global_context->config.space_type) +
match_chro(read_text + cursor_on_read - min(current_indel_len,0), current_value_index, indel_left_boundary + max(0, current_indel_len), 6, 0, global_context->config.space_type);
for(ambiguous_i=-5; ambiguous_i<=5; ambiguous_i++)
{
int left_match = match_chro(read_text + cursor_on_read - 6, current_value_index, indel_left_boundary - 6, 6+ambiguous_i, 0, global_context->config.space_type);
int right_match = match_chro(read_text + cursor_on_read + ambiguous_i - min(current_indel_len,0), current_value_index, indel_left_boundary + ambiguous_i + max(0, current_indel_len), 6-ambiguous_i, 0,global_context->config.space_type);
if(left_match+right_match == best_matched_bases) ambiguous_count ++;
}
}
if(abs(current_indel_len)<=global_context -> config.max_indel_length) if(abs(current_indel_len)<=global_context -> config.max_indel_length)
{ {
chromosome_event_t * new_event = local_add_indel_event(global_context, thread_context, event_table, read_text + cursor_on_read + min(0,current_indel_len), indel_left_boundary - 1, current_indel_len, 1, ambiguous_count, 0); chromosome_event_t * new_event = local_add_indel_event(global_context, thread_context, event_table, read_text + cursor_on_read + min(0,current_indel_len), indel_left_boundary - 1, current_indel_len, 1, ambiguous_count, 0);
......
...@@ -499,6 +499,11 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex ...@@ -499,6 +499,11 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
} }
} }
if(argc > optind){
SUBREADprintf("Invalid parameter '%s'\n", argv[optind]);
return -1;
}
global_context->config.more_accurate_fusions = global_context->config.more_accurate_fusions && global_context->config.do_fusion_detection; global_context->config.more_accurate_fusions = global_context->config.more_accurate_fusions && global_context->config.do_fusion_detection;
if(global_context->config.more_accurate_fusions) if(global_context->config.more_accurate_fusions)
{ {
......
...@@ -538,6 +538,12 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex ...@@ -538,6 +538,12 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
} }
} }
if(argc > optind){
SUBREADprintf("Invalid parameter '%s'\n", argv[optind]);
return -1;
}
if(global_context->config.is_SAM_file_input) global_context->config.phred_score_format = FASTQ_PHRED33; if(global_context->config.is_SAM_file_input) global_context->config.phred_score_format = FASTQ_PHRED33;
global_context->config.more_accurate_fusions = global_context->config.more_accurate_fusions && global_context->config.do_fusion_detection; global_context->config.more_accurate_fusions = global_context->config.more_accurate_fusions && global_context->config.do_fusion_detection;
......
...@@ -2012,7 +2012,7 @@ int find_donor_receptor(global_context_t * global_context, thread_context_t * th ...@@ -2012,7 +2012,7 @@ int find_donor_receptor(global_context_t * global_context, thread_context_t * th
char out1pos[100]; char out1pos[100];
absoffset_to_posstr(global_context, search_in_chro_start, out1pos); absoffset_to_posstr(global_context, search_in_chro_start, out1pos);
if(1 || FIXLENstrcmp("chr14:105",out1pos)==0){ if(0 && FIXLENstrcmp("chr14:105",out1pos)==0){
SUBREADprintf("POS=%s\t\tINS=%d\t\t%s\n", out1pos, best_insertion_in_between, rname); SUBREADprintf("POS=%s\t\tINS=%d\t\t%s\n", out1pos, best_insertion_in_between, rname);
SUBREADprintf("R= %s\nS1=%s%s\nS2=%s%s\n %s|%s|\n\n", rtext, sp1s, chro_bases_startside, sp1s, chro_bases_endside, spE, spBB); SUBREADprintf("R= %s\nS1=%s%s\nS2=%s%s\n %s|%s|\n\n", rtext, sp1s, chro_bases_startside, sp1s, chro_bases_endside, spE, spBB);
} }
......
This diff is collapsed.
...@@ -372,7 +372,7 @@ typedef struct{ ...@@ -372,7 +372,7 @@ typedef struct{
short realign_flags; short realign_flags;
short final_quality; short final_quality;
short chromosomal_length; short chromosomal_length;
short MAPQ_adjustment;
} realignment_result_t; } realignment_result_t;
#define BUCKETED_TABLE_INIT_ITEMS 3 #define BUCKETED_TABLE_INIT_ITEMS 3
...@@ -619,7 +619,7 @@ unsigned short * _global_retrieve_big_margin_ptr(global_context_t * global_conte ...@@ -619,7 +619,7 @@ unsigned short * _global_retrieve_big_margin_ptr(global_context_t * global_conte
// The first base in the read actually has a larger coordinate than Pos. // The first base in the read actually has a larger coordinate than Pos.
unsigned int reverse_cigar(unsigned int pos, char * cigar, char * new_cigar); unsigned int reverse_cigar(unsigned int pos, char * cigar, char * new_cigar);
int chimeric_cigar_parts(global_context_t * global_context , unsigned int sel_pos, int is_first_section_negative_strand, int is_first_section_reversed, char * in_cigar, unsigned int * out_poses, char ** out_cigars, char * out_strands, int read_len, short * out_read_lens); int chimeric_cigar_parts(global_context_t * global_context , unsigned int sel_pos, int is_first_section_negative_strand, int is_first_section_reversed, char * in_cigar, unsigned int * out_poses, char ** out_cigars, char * out_strands, int read_len, short * out_read_lens, char * read_name);
void warning_file_limit(); void warning_file_limit();
void quick_sort(void * arr,int arr_size, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r)); void quick_sort(void * arr,int arr_size, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r));
...@@ -637,5 +637,6 @@ int FIXLENstrcmp(char * fixed_len, char * rname); ...@@ -637,5 +637,6 @@ int FIXLENstrcmp(char * fixed_len, char * rname);
int is_valid_digit(char * optarg, char * optname); int is_valid_digit(char * optarg, char * optname);
int is_valid_digit_range(char * optarg, char * optname, int min, int max_inc); int is_valid_digit_range(char * optarg, char * optname, int min, int max_inc);
int is_valid_float(char * optarg, char * optname);
int exec_cmd(char * cmd, char * outstr, int out_limit); int exec_cmd(char * cmd, char * outstr, int out_limit);
#endif #endif
...@@ -888,9 +888,12 @@ int match_chro(char * read, gene_value_index_t * index, unsigned int pos, int te ...@@ -888,9 +888,12 @@ int match_chro(char * read, gene_value_index_t * index, unsigned int pos, int te
case 'C': case 'C':
ret += tt==2; ret += tt==2;
break; break;
case 0:
//SUBREADprintf("NON-ATGC-CHAR:%d\n", tv);
//assert(0);
break;
default: default:
ret += tt==3; ret += tt==3;
break;
} }
offset_bit+=2; offset_bit+=2;
......
...@@ -61,6 +61,20 @@ void fastq_64_to_33(char * qs) ...@@ -61,6 +61,20 @@ void fastq_64_to_33(char * qs)
qs[i++] -= 31; qs[i++] -= 31;
} }
void * delay_run(void * ptr){
usleep(100000);
free(ptr);
return NULL;
}
void * delay_realloc(void * old_pntr, size_t old_size, size_t new_size){
pthread_t thread;
void * new_ret = malloc(new_size);
memcpy(new_ret, old_pntr, old_size);
pthread_create(&thread, NULL, delay_run, old_pntr);
return new_ret;
}
double guess_reads_density(char * fname, int is_sam) double guess_reads_density(char * fname, int is_sam)
{ {
return guess_reads_density_format(fname, is_sam, NULL, NULL, NULL); return guess_reads_density_format(fname, is_sam, NULL, NULL, NULL);
...@@ -1268,7 +1282,7 @@ void fix_cigar_SAM14(char * cig){ ...@@ -1268,7 +1282,7 @@ void fix_cigar_SAM14(char * cig){
} }
//This function returns 0 if the line is a mapped read; -1 if the line is in a wrong format and 1 if the read is unmapped. //This function returns 0 if the line is a mapped read; -1 if the line is in a wrong format and 1 if the read is unmapped.
int parse_SAM_line(char * sam_line, char * read_name, int * flags, char * chro, unsigned int * pos, char * cigar, int * mapping_quality, unsigned int * pair_dist, char * sequence , char * quality_string, int * rl, int * repeated) int parse_SAM_line(char * sam_line, char * read_name, int * flags, char * chro, unsigned int * pos, char * cigar, int * mapping_quality, unsigned int * pair_dist, char * sequence, char * quality_string, int * rl, int * repeated)
{ {
char cc; char cc;
int ci = 0, k=0, field=0, ret_quality = 0, ret_flag = 0, ret_pairdist=0; int ci = 0, k=0, field=0, ret_quality = 0, ret_flag = 0, ret_pairdist=0;
...@@ -1822,7 +1836,7 @@ int break_SAM_file(char * in_SAM_file, int is_BAM_file, char * temp_file_prefix, ...@@ -1822,7 +1836,7 @@ int break_SAM_file(char * in_SAM_file, int is_BAM_file, char * temp_file_prefix,
//SUBREADprintf("ARRI_0=%p ; OFFS=%p ; EVT=%p\n%s\n",array_index, offsets, event_table, line_buffer); //SUBREADprintf("ARRI_0=%p ; OFFS=%p ; EVT=%p\n%s\n",array_index, offsets, event_table, line_buffer);
int line_parse_result = parse_SAM_line(line_buffer, read_name, &flags, chro, &pos, cigar, & mapping_quality, &pairdist, sequence , quality_string, &rl, &repeated); int line_parse_result = parse_SAM_line(line_buffer, read_name, &flags, chro, &pos, cigar, & mapping_quality, &pairdist, sequence, quality_string, &rl, &repeated);
//SUBREADprintf("ARRI_2=%p ; OFFS=%p ; EVT=%p\n",array_index, offsets, event_table); //SUBREADprintf("ARRI_2=%p ; OFFS=%p ; EVT=%p\n",array_index, offsets, event_table);
if(strlen(quality_string)<2) if(strlen(quality_string)<2)
...@@ -1910,7 +1924,7 @@ int break_SAM_file(char * in_SAM_file, int is_BAM_file, char * temp_file_prefix, ...@@ -1910,7 +1924,7 @@ int break_SAM_file(char * in_SAM_file, int is_BAM_file, char * temp_file_prefix,
} }
// printf("INST: RL=%d; INSL=%d; READ_CUR=%d; IGNORE=%d\n", rl, insert_length, read_cursor , base_ignored_head_tail); // printf("INST: RL=%d; INSL=%d; READ_CUR=%d; IGNORE=%d\n", rl, insert_length, read_cursor , base_ignored_head_tail);
if(need_write && insert_length >= 5) if(need_write && insert_length >= 5 && sequence[0]!='*')
{ {
sprintf(temp_file_name, "%s%s", temp_file_prefix , temp_file_suffix); sprintf(temp_file_name, "%s%s", temp_file_prefix , temp_file_suffix);
temp_fp = get_temp_file_pointer(temp_file_name, fp_table, &close_now); temp_fp = get_temp_file_pointer(temp_file_name, fp_table, &close_now);
...@@ -1947,7 +1961,7 @@ int break_SAM_file(char * in_SAM_file, int is_BAM_file, char * temp_file_prefix, ...@@ -1947,7 +1961,7 @@ int break_SAM_file(char * in_SAM_file, int is_BAM_file, char * temp_file_prefix,
// the left edge ( last WANTED base ) is chromosome_cursor-1 // the left edge ( last WANTED base ) is chromosome_cursor-1
// the indel length is -tmpv; // the indel length is -tmpv;
// now we add this into the event table. // now we add this into the event table.
if(event_table) if(event_table && sequence[0]!='*')
add_cigar_indel_event(event_table, chro, chromosome_cursor-1, -tmpv, sequence + read_cursor); add_cigar_indel_event(event_table, chro, chromosome_cursor-1, -tmpv, sequence + read_cursor);
read_cursor += tmpv; read_cursor += tmpv;
tmpv = 0; tmpv = 0;
...@@ -2161,6 +2175,8 @@ void delete_with_prefix(char * prefix){ ...@@ -2161,6 +2175,8 @@ void delete_with_prefix(char * prefix){
strcpy(del_suffix , prefix); strcpy(del_suffix , prefix);
} }
//#warning ">>>>>>>> COMMENT THIS OUT <<<<<<<<<<<<<<<<<<<<<"
//SUBREADprintf("SCANDEL: %s, PREFIX %s, SUFFIX %s\n", del2, prefix, del_suffix);
if(strlen(del_suffix)>8) if(strlen(del_suffix)>8)
{ {
DIR *d; DIR *d;
...@@ -2173,11 +2189,13 @@ void delete_with_prefix(char * prefix){ ...@@ -2173,11 +2189,13 @@ void delete_with_prefix(char * prefix){
{ {
if(strstr(dir->d_name, del_suffix)) if(strstr(dir->d_name, del_suffix))
{ {
//SUBREADprintf("DEL:%s\n", dir->d_name);
strcpy(del_name, del2); strcpy(del_name, del2);
strcat(del_name, "/"); strcat(del_name, "/");
strcat(del_name, dir->d_name); strcat(del_name, dir->d_name);
unlink(del_name); unlink(del_name);
// #warning ">>>>>>>> COMMENT THIS OUT <<<<<<<<<<<<<<<<<<<<<"
// SUBREADprintf("DEL: %s\n", del_name);
//test fix //test fix
} }
} }
...@@ -2371,7 +2389,7 @@ int SAM_pairer_create(SAM_pairer_context_t * pairer, int all_threads, int bin_bu ...@@ -2371,7 +2389,7 @@ int SAM_pairer_create(SAM_pairer_context_t * pairer, int all_threads, int bin_bu
pairer -> input_fp = f_subr_open(in_file, "rb"); pairer -> input_fp = f_subr_open(in_file, "rb");
if(NULL == pairer -> input_fp) return 1; if(NULL == pairer -> input_fp) return 1;
SAM_pairer_warning_file_open_limit(pairer); SAM_pairer_warning_file_open_limit();
pairer -> input_is_BAM = BAM_input; pairer -> input_is_BAM = BAM_input;
pairer -> tiny_mode = is_Tiny_Mode; pairer -> tiny_mode = is_Tiny_Mode;
...@@ -2392,11 +2410,11 @@ int SAM_pairer_create(SAM_pairer_context_t * pairer, int all_threads, int bin_bu ...@@ -2392,11 +2410,11 @@ int SAM_pairer_create(SAM_pairer_context_t * pairer, int all_threads, int bin_bu
pairer -> appendix1 = appendix1; pairer -> appendix1 = appendix1;
_REPAIRER_delete_temp_prefix = tmp_path;
old_sig_TERM = signal (SIGTERM, REPAIR_SIGINT_hook); old_sig_TERM = signal (SIGTERM, REPAIR_SIGINT_hook);
old_sig_INT = signal (SIGINT, REPAIR_SIGINT_hook); old_sig_INT = signal (SIGINT, REPAIR_SIGINT_hook);
strcpy(pairer -> tmp_file_prefix, tmp_path); strcpy(pairer -> tmp_file_prefix, tmp_path);
_REPAIRER_delete_temp_prefix = pairer -> tmp_file_prefix;
pairer -> threads = malloc(all_threads * sizeof(SAM_pairer_thread_t)); pairer -> threads = malloc(all_threads * sizeof(SAM_pairer_thread_t));
memset(pairer -> threads, 0, all_threads * sizeof(SAM_pairer_thread_t)); memset(pairer -> threads, 0, all_threads * sizeof(SAM_pairer_thread_t));
...@@ -2481,7 +2499,9 @@ int SAM_pairer_read_BAM_block(FILE * fp, int max_read_len, char * inbuff) { ...@@ -2481,7 +2499,9 @@ int SAM_pairer_read_BAM_block(FILE * fp, int max_read_len, char * inbuff) {
unsigned char gz_header_12 [12]; unsigned char gz_header_12 [12];
//SUBREADprintf("STAT GZ POS=%llu\n", ftello(fp)); //SUBREADprintf("STAT GZ POS=%llu\n", ftello(fp));
int read_len = fread(gz_header_12, 1, 12, fp ); int read_len = fread(gz_header_12, 1, 12, fp );
if(read_len < 12) return -1; if(read_len < 12){
return -1;
}
if(gz_header_12[0]!=31 || gz_header_12[1]!=139){ if(gz_header_12[0]!=31 || gz_header_12[1]!=139){
SUBREADprintf("Unrecognized Gzip headers: %u, %u\nPlease make sure if the input file is in the BAM format.\n", gz_header_12[0], gz_header_12[1]); SUBREADprintf("Unrecognized Gzip headers: %u, %u\nPlease make sure if the input file is in the BAM format.\n", gz_header_12[0], gz_header_12[1]);
return -1; return -1;
...@@ -2569,26 +2589,31 @@ void SAM_pairer_fill_BIN_buff(SAM_pairer_context_t * pairer , SAM_pairer_thread ...@@ -2569,26 +2589,31 @@ void SAM_pairer_fill_BIN_buff(SAM_pairer_context_t * pairer , SAM_pairer_thread
// For SAM files: must be the full lines. // For SAM files: must be the full lines.
int current_buffer_used = 0; int current_buffer_used = 0;
int current_blocks = 0; int current_blocks = 0;
int last_read_len = -1, this_size;
if(pairer -> input_is_BAM){ if(pairer -> input_is_BAM){
while(1){ while(1){
if(feof(pairer -> input_fp)){ if( feof(pairer -> input_fp)){
*is_finished = 1; *is_finished = 1;
break; break;
} }
if(pairer -> input_buff_SBAM_size - current_buffer_used < MIN_BAM_BLOCK_SIZE) { if(pairer -> input_buff_SBAM_size - current_buffer_used < MIN_BAM_BLOCK_SIZE) {
break; break;
} }
int this_size = 0;
this_size = SAM_pairer_read_BAM_block( pairer -> input_fp , pairer -> input_buff_SBAM_size - current_buffer_used , thread_context -> input_buff_SBAM + current_buffer_used); this_size = SAM_pairer_read_BAM_block( pairer -> input_fp , pairer -> input_buff_SBAM_size - current_buffer_used , thread_context -> input_buff_SBAM + current_buffer_used);
current_blocks ++; current_blocks ++;
if(this_size >= 0) { if(this_size >= 0) {
current_buffer_used += this_size; current_buffer_used += this_size;
} else { } else {
if(feof(pairer -> input_fp) && last_read_len != -1 ){
pairer -> is_bad_format |= (last_read_len > 2);
pairer -> is_incomplete_BAM |= (last_read_len > 2);
//SUBREADprintf("BAM-FINISHED, CORRECT=%d (%d)\n", !pairer -> is_bad_format, last_read_len);
}
*is_finished = 1; *is_finished = 1;
break; break;
} }
last_read_len = this_size;
} }
}else{ }else{
current_buffer_used = SAM_pairer_read_SAM_MB(pairer -> input_fp , pairer -> input_buff_SBAM_size , thread_context -> input_buff_SBAM); current_buffer_used = SAM_pairer_read_SAM_MB(pairer -> input_fp , pairer -> input_buff_SBAM_size , thread_context -> input_buff_SBAM);
...@@ -2908,8 +2933,9 @@ int online_register_contig(SAM_pairer_context_t * pairer , SAM_pairer_thread_t * ...@@ -2908,8 +2933,9 @@ int online_register_contig(SAM_pairer_context_t * pairer , SAM_pairer_thread_t *
if(refId < 0){ if(refId < 0){
refId = pairer->sam_contig_number_table->numOfElements; refId = pairer->sam_contig_number_table->numOfElements;
pairer -> output_header(pairer, thread_context -> thread_id, 0, 1 , header_sec , 8+reflen); pairer -> output_header(pairer, thread_context -> thread_id, 0, 1 , header_sec , 8+reflen);
char * mem_ref = malloc(reflen); char * mem_ref = malloc(reflen+1);
memcpy(mem_ref, ref, reflen); memcpy(mem_ref, ref, reflen);
mem_ref[reflen]=0;
HashTablePut(pairer->sam_contig_number_table, mem_ref, NULL + refId + 1); HashTablePut(pairer->sam_contig_number_table, mem_ref, NULL + refId + 1);
} }
subread_lock_release(&pairer -> output_header_lock); subread_lock_release(&pairer -> output_header_lock);
...@@ -4932,7 +4958,7 @@ int SAM_pairer_run( SAM_pairer_context_t * pairer){ ...@@ -4932,7 +4958,7 @@ int SAM_pairer_run( SAM_pairer_context_t * pairer){
}else for(corrected_run = 0; corrected_run < 2 ; corrected_run ++){ }else for(corrected_run = 0; corrected_run < 2 ; corrected_run ++){
SAM_pairer_run_once(pairer); SAM_pairer_run_once(pairer);
if(pairer -> is_bad_format && pairer->input_is_BAM){ if(pairer -> is_bad_format && pairer->input_is_BAM && ! pairer -> is_incomplete_BAM){
//#warning ">>>>>> REMOVE '+ 1' FROM NEXT LINE IN RELEASE <<<<<<" //#warning ">>>>>> REMOVE '+ 1' FROM NEXT LINE IN RELEASE <<<<<<"
assert(1 != corrected_run); assert(1 != corrected_run);
//#warning ">>>>>> COMMENT NEXT LINE IN RELEASE <<<<<<" //#warning ">>>>>> COMMENT NEXT LINE IN RELEASE <<<<<<"
...@@ -4946,7 +4972,7 @@ int SAM_pairer_run( SAM_pairer_context_t * pairer){ ...@@ -4946,7 +4972,7 @@ int SAM_pairer_run( SAM_pairer_context_t * pairer){
}else break; }else break;
} }
return 0; return pairer -> is_bad_format;
} }
int sort_SAM_create(SAM_sort_writer * writer, char * output_file, char * tmp_path) int sort_SAM_create(SAM_sort_writer * writer, char * output_file, char * tmp_path)
......
...@@ -145,6 +145,7 @@ typedef struct { ...@@ -145,6 +145,7 @@ typedef struct {
unsigned int BAM_l_text; unsigned int BAM_l_text;
unsigned int BAM_n_ref; unsigned int BAM_n_ref;
int is_unsorted_notified; int is_unsorted_notified;
int is_incomplete_BAM;
void (* reset_output_function) (void * pairer); void (* reset_output_function) (void * pairer);
int (* output_function) (void * pairer, int thread_no, char * rname, char * bin1, char * bin2); int (* output_function) (void * pairer, int thread_no, char * rname, char * bin1, char * bin2);
...@@ -307,4 +308,5 @@ int SAM_pairer_writer_create( SAM_pairer_writer_main_t * bam_main , int all_thre ...@@ -307,4 +308,5 @@ int SAM_pairer_writer_create( SAM_pairer_writer_main_t * bam_main , int all_thre
void SAM_pairer_writer_destroy( SAM_pairer_writer_main_t * bam_main ) ; void SAM_pairer_writer_destroy( SAM_pairer_writer_main_t * bam_main ) ;
int SAM_pairer_iterate_int_tags(unsigned char * bin, int bin_len, char * tag_name, int * saved_value); int SAM_pairer_iterate_int_tags(unsigned char * bin, int bin_len, char * tag_name, int * saved_value);
int SAM_pairer_warning_file_open_limit(); int SAM_pairer_warning_file_open_limit();
void *delay_realloc(void * old_pntr, size_t old_size, size_t new_size);
#endif #endif
...@@ -62,26 +62,26 @@ int mergeIntervals(unsigned int * intervals, unsigned int * result_ints, int nin ...@@ -62,26 +62,26 @@ int mergeIntervals(unsigned int * intervals, unsigned int * result_ints, int nin
stack_buffer_mem[stock_pointer*2+1] = intervals[1]; stack_buffer_mem[stock_pointer*2+1] = intervals[1];
stock_pointer++; stock_pointer++;
for (ii = 1 ; ii < nints; ii++) for (ii = 1 ; ii < nints; ii++)
{ {
//int top_start = stack_buffer_mem[stock_pointer*2-2]; //int top_start = stack_buffer_mem[stock_pointer*2-2];
int top_stop = stack_buffer_mem[stock_pointer*2-1]; int top_stop = stack_buffer_mem[stock_pointer*2-1];
if (top_stop < intervals[ii*2]) if (top_stop < intervals[ii*2])
{ {
stack_buffer_mem[stock_pointer * 2] = intervals[ii*2]; stack_buffer_mem[stock_pointer * 2] = intervals[ii*2];
stack_buffer_mem[stock_pointer * 2+1] = intervals[ii*2+1]; stack_buffer_mem[stock_pointer * 2+1] = intervals[ii*2+1];
stock_pointer++; stock_pointer++;
} }
else if (top_stop < intervals[ii*2+1]) else if (top_stop < intervals[ii*2+1])
{ {
top_stop = intervals[ii*2+1]; top_stop = intervals[ii*2+1];
stack_buffer_mem[stock_pointer*2-1] = top_stop; stack_buffer_mem[stock_pointer*2-1] = top_stop;
} }
// Otherwise the ii-th interval is useless because it is enclosed in the top item // Otherwise the ii-th interval is useless because it is enclosed in the top item
} }
return stock_pointer; return stock_pointer;
} }
#ifdef MAKE_TEST_INTERVL_MERGE #ifdef MAKE_TEST_INTERVL_MERGE
...@@ -126,6 +126,6 @@ int main() ...@@ -126,6 +126,6 @@ int main()
gaps = mergeIntervals(inbuff, outbuff, 6); gaps = mergeIntervals(inbuff, outbuff, 6);
print_gaps(outbuff, gaps); print_gaps(outbuff, gaps);
return 0; return 0;
} }
#endif #endif
SUBREAD_VERSION_BASE=1.5.0-p3 SUBREAD_VERSION_BASE=1.5.1
SUBREAD_VERSION_DATE=$(SUBREAD_VERSION_BASE)-$(shell date +"%d%b%Y") SUBREAD_VERSION_DATE=$(SUBREAD_VERSION_BASE)-$(shell date +"%d%b%Y")
SUBREAD_VERSION="$(SUBREAD_VERSION_DATE)" SUBREAD_VERSION="$(SUBREAD_VERSION_DATE)"
SUBREAD_VERSION="$(SUBREAD_VERSION_BASE)" SUBREAD_VERSION="$(SUBREAD_VERSION_BASE)"
......
This diff is collapsed.
geNeiD 0 Geneid data/test-junc.sam
simu_gene1 0 simu_gene1 0
simu_gene2 0 simu_gene2 0
simu_gene3 0 simu_gene3 0
simu_gene4 0 simu_gene4 0
simu_gene5 15 simu_gene5 13
simu_gene6 19 simu_gene6 21
simu_gene7 157 simu_gene7 145
g1 chr3 1000 2000 +
g1 chr3 5000 6000 +
g2 chr3 1000 2000 +
g2 chr3 3000 4000 +
g3 chr3 1000 2000 +
@SQ SN:chr3 LN:999950
R1 0 chr3 1000 40 100M * 0 0 N I NH:i:3 HI:i:1
R1 0 chr3 3000 40 100M * 0 0 N I NH:i:3 HI:i:2
R1 0 chr3 5000 40 100M * 0 0 N I NH:i:3 HI:i:3
R2 0 chr3 1000 40 100M * 0 0 N I NH:i:1 HI:i:1
R3 0 chr3 3000 40 100M * 0 0 N I NH:i:2 HI:i:1
R3 0 chr3 9000 40 100M * 0 0 N I NH:i:2 HI:i:2
Geneid test-fracOverlap.sam
g1 1
g2 0
g3 0
read1 99 chr3 4950 40 60M300N40M = 4970 120 N I
read1 147 chr3 4970 40 40M300N60M = 4950 120 N I
read1 99 chr3 4955 40 60M300N40M = 4975 120 N I
read1 147 chr3 4975 40 40M300N60M = 4955 120 N I
...@@ -21,7 +21,8 @@ $SH_CMD data/compare.sh data/test-junc.sam data/corner-BothEnds.ora data/test-mi ...@@ -21,7 +21,8 @@ $SH_CMD data/compare.sh data/test-junc.sam data/corner-BothEnds.ora data/test-mi
$SH_CMD data/compare.sh data/test-junc.sam data/corner-Chimeric.ora data/test-minimum.SAF "-p -F SAF -C " "disallowing chimeric fragments" $SH_CMD data/compare.sh data/test-junc.sam data/corner-Chimeric.ora data/test-minimum.SAF "-p -F SAF -C " "disallowing chimeric fragments"
$SH_CMD data/compare.sh data/test-junc.sam data/corner-MultiMapping.ora data/test-minimum.SAF "-p -F SAF -M " "Allowing multi-mapped reads" $SH_CMD data/compare.sh data/test-junc.sam data/corner-MultiMapping.ora data/test-minimum.SAF "-p -F SAF -M " "Allowing multi-mapped reads"
$SH_CMD data/compare.sh data/test-junc.sam data/corner-DoNotSort.ora data/test-minimum.SAF " -p -F SAF --donotsort " "not sorting input file" $SH_CMD data/compare.sh data/test-junc.sam data/corner-DoNotSort.ora data/test-minimum.SAF " -p -F SAF --donotsort " "not sorting input file"
$SH_CMD data/compare.sh data/test-junc.sam data/corner-MinOverlap.ora data/test-minimum.SAF " --minOverlap 185 -p -F SAF " "minimum overlapping length" $SH_CMD data/compare.sh data/test-junc.sam data/corner-MinOverlap.ora data/test-minimum.SAF " --minOverlap 125 -p -F SAF " "minimum overlapping length"
$SH_CMD data/compare.sh data/test-fracOverlap.sam data/test-fracOverlap.ora data/corner-fractions.SAF " --fracOverlap 0.62 -O -p -F SAF " "minimum overlapping fraction"
$SH_CMD data/compare.sh data/test-junc.sam data/corner-LargestOverlap.ora data/test-minimum.SAF "-p -F SAF --largestOverlap" "Largest Overlapping" $SH_CMD data/compare.sh data/test-junc.sam data/corner-LargestOverlap.ora data/test-minimum.SAF "-p -F SAF --largestOverlap" "Largest Overlapping"
$SH_CMD data/compare.sh data/test-junc.sam data/corner-PEdist.ora data/test-minimum.SAF " -p -F SAF -B -C -P -d 130 -D 770 " "paired-end distance" $SH_CMD data/compare.sh data/test-junc.sam data/corner-PEdist.ora data/test-minimum.SAF " -p -F SAF -B -C -P -d 130 -D 770 " "paired-end distance"
$SH_CMD data/compare.sh data/test-junc.sam data/corner-Read2Pos5.ora data/test-minimum.SAF " -p -F SAF --read2pos 5 " "Read to position (5' end)" $SH_CMD data/compare.sh data/test-junc.sam data/corner-Read2Pos5.ora data/test-minimum.SAF " -p -F SAF --read2pos 5 " "Read to position (5' end)"
...@@ -32,6 +33,7 @@ $SH_CMD data/compare.sh data/test-junc.sam data/corner-MaxOPs.ora data/test-mini ...@@ -32,6 +33,7 @@ $SH_CMD data/compare.sh data/test-junc.sam data/corner-MaxOPs.ora data/test-mini
$SH_CMD data/compare.sh data/test-junc.sam data/corner-MinMAPQ.ora data/test-minimum.SAF " -p -F SAF -Q 58" "minimum mapping quality" $SH_CMD data/compare.sh data/test-junc.sam data/corner-MinMAPQ.ora data/test-minimum.SAF " -p -F SAF -Q 58" "minimum mapping quality"
$SH_CMD data/compare.sh data/test-dup.sam data/corner-IgnoreDup.ora data/test-minimum.SAF "-p -F SAF --ignoreDup " "Ignoring duplicated reads" $SH_CMD data/compare.sh data/test-dup.sam data/corner-IgnoreDup.ora data/test-minimum.SAF "-p -F SAF --ignoreDup " "Ignoring duplicated reads"
$SH_CMD data/compare.sh data/test-junc.sam data/corner-Fraction.ora data/test-minimum.SAF "-p -F SAF --fraction -M " "Fraction counting" $SH_CMD data/compare.sh data/test-junc.sam data/corner-Fraction.ora data/test-minimum.SAF "-p -F SAF --fraction -M " "Fraction counting"
$SH_CMD data/compare.sh data/corner-fractions.sam data/corner-fractions.ora data/corner-fractions.SAF " -O -M -F SAF --fraction " "Advanced fractions"
$SH_CMD data/compare.sh data/test-junc.sam data/corner-Jcounts.ora data/test-minimum.SAF "-p -F SAF -J " "Junction counting" JC $SH_CMD data/compare.sh data/test-junc.sam data/corner-Jcounts.ora data/test-minimum.SAF "-p -F SAF -J " "Junction counting" JC
if test -f /usr/local/work/liao/subread/chromosomes/all_34_alt.fa if test -f /usr/local/work/liao/subread/chromosomes/all_34_alt.fa
...@@ -57,7 +59,7 @@ $SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum-UNSTR.ora data/t ...@@ -57,7 +59,7 @@ $SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum-UNSTR.ora data/t
# test 5' and 3' end extension # test 5' and 3' end extension
$SH_CMD data/compare.sh data/test-chrname.sam data/test-minimum-dup.ora data/test-minimum.GTF " -p --ignoreDup " "Ignoring duplicate fragments"