New upstream version 1.6.3+dfsg

parent b354f0db
This diff is collapsed.
......@@ -839,6 +839,7 @@ int mac_str(char * str_buff)
}
return ret;
#else
#if defined(IFHWADDRLEN)
struct ifreq ifr;
struct ifconf ifc;
char buf[1024];
......@@ -878,6 +879,7 @@ int mac_str(char * str_buff)
}
return 0;
}
#endif
return 1;
#endif
#endif
......@@ -1118,7 +1120,7 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
if(!is_GFF_geneid_warned){
int ext_att_len = strlen(extra_attrs);
if(extra_attrs[ext_att_len-1] == '\n') extra_attrs[ext_att_len-1] =0;
SUBREADprintf("\nWarning: failed to find the gene identifier attribute in the 9th column of the provided GTF file.\nThe specified gene identifier attribute is '%s' \nThe attributes included in your GTF annotation are '%s' \n\n", gene_id_column, extra_attrs);
SUBREADprintf("\nERROR: failed to find the gene identifier attribute in the 9th column of the provided GTF file.\nThe specified gene identifier attribute is '%s'.\nAn example of attributes included in your GTF annotation is '%s'.\nThe program has to terminate.\n\n", gene_id_column, extra_attrs);
}
is_GFF_geneid_warned++;
}
......@@ -1127,7 +1129,7 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
if(!is_GFF_txid_warned){
int ext_att_len = strlen(extra_attrs);
if(extra_attrs[ext_att_len-1] == '\n') extra_attrs[ext_att_len-1] =0;
SUBREADprintf("\nWarning: failed to find the transcript identifier attribute in the 9th column of the provided GTF file.\nThe specified gene identifier attribute is '%s' \nThe attributes included in your GTF annotation are '%s' \n\n", transcript_id_column, extra_attrs);
SUBREADprintf("\nERROR: failed to find the transcript identifier attribute in the 9th column of the provided GTF file.\nThe specified transcript identifier attribute is '%s'.\nAn example of attributes included in your GTF annotation is '%s'.\nThe program has to terminate\n\n", transcript_id_column, extra_attrs);
}
is_GFF_txid_warned++;
}
......@@ -1141,7 +1143,13 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
}
autozip_close(&afp);
free(file_line);
free(file_line);
if(is_GFF_txid_warned || is_GFF_geneid_warned)return -2;
if(loaded_features<1){
SUBREADprintf("\nERROR: No feature was loaded from the annotation file. Please check if the annotation format was correctly specified, and also if the feature type was correctly specified if the annotation is in the GTF format.\n\n");
return -2;
}
return loaded_features;
}
......
......@@ -14,11 +14,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped coverageCount
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount subtools qualityScores propmapped subread-fullscan removeDup ../bin/utilities
mv repair subtools qualityScores propmapped subread-fullscan removeDup ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -70,9 +70,6 @@ subread-fullscan: fullscan.c ${ALL_OBJECTS}
subtools: subtools.c ${ALL_OBJECTS}
${CC} -o subtools subtools.c ${ALL_OBJECTS} ${LDFLAGS}
coverageCount: coverage_calc.c ${ALL_OBJECTS}
${CC} -o coverageCount coverage_calc.c ${ALL_OBJECTS} ${LDFLAGS}
clean:
rm -f core featureCounts exactSNP removeDup subread-buildindex ${ALL_OBJECTS}
......@@ -8,7 +8,7 @@ include makefile.version
-include ~/.R/DBPZ_debug_makefile
CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 ${WARNING_LEVEL}
LDFLAGS = ${STATIC_MAKE} -pthread -lz -lm ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE
LDFLAGS = ${STATIC_MAKE} -pthread -lz ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -lm
CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb
......@@ -17,11 +17,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount flattenGTF # samMappedBases mergeVCF testZlib
all: detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # samMappedBases mergeVCF testZlib
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv detectionCall repair coverageCount propmapped qualityScores removeDup subread-fullscan txUnique flattenGTF ../bin/utilities
mv detectionCall repair propmapped qualityScores removeDup subread-fullscan txUnique flattenGTF ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -85,8 +85,5 @@ qualityScores: qualityScores.c ${ALL_OBJECTS}
subread-fullscan: fullscan.c ${ALL_OBJECTS}
${CC} -o subread-fullscan fullscan.c ${ALL_OBJECTS} ${LDFLAGS}
coverageCount: coverage_calc.c ${ALL_OBJECTS}
${CC} -o coverageCount coverage_calc.c ${ALL_OBJECTS} ${LDFLAGS}
clean:
rm -f core featureCounts exactSNP removeDup subread-buildindex ${ALL_OBJECTS}
......@@ -11,11 +11,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount flattenGTF # globalReassembly testZlib
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # globalReassembly testZlib
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount subread-fullscan qualityScores removeDup propmapped flattenGTF ../bin/utilities
mv repair subread-fullscan qualityScores removeDup propmapped flattenGTF ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -70,9 +70,6 @@ qualityScores: qualityScores.c ${ALL_OBJECTS}
subread-fullscan: fullscan.c ${ALL_OBJECTS}
${CC} -o subread-fullscan fullscan.c ${ALL_OBJECTS} ${LDFLAGS}
coverageCount: coverage_calc.c ${ALL_OBJECTS}
${CC} -o coverageCount coverage_calc.c ${ALL_OBJECTS} ${LDFLAGS}
testZlib: test-seek-zlib.c ${ALL_OBJECTS}
${CC} -o testZlib test-seek-zlib.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -476,8 +476,7 @@ void fishers_test_on_POI(struct SNP_Calling_Parameters * parameters, float * snp
}
}
}
void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * snp_fisher_raw, unsigned int * snp_voting_piles, char * referenced_genome, unsigned int reference_len, double multiplex_base, char * SNP_bitmap_recorder, unsigned short * snp_filter_background_matched, unsigned short * snp_filter_background_unmatched, int all_result_needed)
{
void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * snp_fisher_raw, unsigned int * snp_voting_piles, char * referenced_genome, unsigned int reference_len, double multiplex_base, char * SNP_bitmap_recorder, unsigned short * snp_filter_background_matched, unsigned short * snp_filter_background_unmatched, int all_result_needed, char * chro_name, unsigned int block_start) {
int i, j, is_fresh_jumppd, remove_old;
int a=0, b=0, c=0, d=0, go_ahead = 0, left_tail = 0;
......@@ -547,6 +546,7 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
float observed_coverage = (b+d) *1./(1. + 2 * parameters -> fisher_exact_testlen) ;
double p_cutoff = pow(10, -(observed_coverage/multiplex_base));
p_cutoff = min(parameters -> cutoff_upper_bound, p_cutoff);
p_cutoff = max(1E-320, p_cutoff);
int flanking_unmatched = b-a;
int flanking_matched = d-c;
......@@ -557,8 +557,11 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
}
float p_middle = fisher_exact_test(a, flanking_unmatched, c, flanking_matched);
if(0 && flanking_matched > 10000 && p_middle < 1E-5)
SUBREADprintf("TEST: %u a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d; p=%G; p-cut=%G\n", i,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail, p_middle, p_cutoff);
unsigned int chro_pos = 1 + block_start + i;
if(0 && chro_pos < 11109861+10 && chro_pos > 11109861-10)
SUBREADprintf("TEST: %s : %u a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d; p=%G; p-cut=%G\n", chro_name, chro_pos ,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail, p_middle, p_cutoff);
if(all_result_needed || ( p_middle < p_cutoff && flanking_matched*20>(flanking_matched+ flanking_unmatched )*16))
snp_fisher_raw [i] = p_middle;
else snp_fisher_raw [i] = -999;
......@@ -663,7 +666,7 @@ int process_snp_votes(FILE *out_fp, unsigned int offset , unsigned int reference
pcutoff_list[i]=-1.;
}
int read_is_error = read_tmp_block(parameters, tmp_fp,&SNP_bitmap_recorder,snp_voting_piles,block_no, reference_len, referenced_genome, chro_name, offset);
int read_is_error = read_tmp_block(parameters, tmp_fp,&SNP_bitmap_recorder,snp_voting_piles,block_no, reference_len, referenced_genome, chro_name, offset - reference_len);
fclose(tmp_fp);
if (parameters -> delete_piles)
......@@ -832,10 +835,10 @@ int process_snp_votes(FILE *out_fp, unsigned int offset , unsigned int reference
if(parameters -> fisher_exact_testlen)
{
fishers_test_on_block(parameters, snp_fisher_raw, snp_voting_piles, referenced_genome, reference_len, multiplex_base, SNP_bitmap_recorder, snp_filter_background_matched, snp_filter_background_unmatched, 0);
fishers_test_on_block(parameters, snp_fisher_raw, snp_voting_piles, referenced_genome, reference_len, multiplex_base, SNP_bitmap_recorder, snp_filter_background_matched, snp_filter_background_unmatched, 0, chro_name, BASE_BLOCK_LENGTH*block_no +1);
if(parameters->background_input_file[0])
{
fishers_test_on_block(parameters, snp_fisher_BGC, snp_BGC_piles, referenced_genome, reference_len, multiplex_base, SNP_bitmap_recorder, NULL, NULL, 1);
fishers_test_on_block(parameters, snp_fisher_BGC, snp_BGC_piles, referenced_genome, reference_len, multiplex_base, SNP_bitmap_recorder, NULL, NULL, 1, chro_name, BASE_BLOCK_LENGTH*block_no +1);
fishers_test_on_POI(parameters, snp_fisher_VS, snp_voting_piles, snp_BGC_piles, referenced_genome, reference_len, snp_fisher_raw);
}
}
......@@ -1476,7 +1479,7 @@ int SNP_calling(char * in_SAM_file, char * out_BED_file, char * in_FASTA_file, c
}
if(parameters -> background_input_file[0])
{
char temp_file_prefix2[300];
char temp_file_prefix2[350];
sprintf(temp_file_prefix2, "%sBGC-", temp_file_prefix);
if(break_SAM_file(parameters -> background_input_file, parameters -> is_BAM_file_input, temp_file_prefix2, NULL, NULL, known_chromosomes, 1, parameters -> bases_ignored_head_tail, parameters->subread_index_array, parameters->subread_index_offsets, NULL, NULL, NULL)) return -1;
}
......
This diff is collapsed.
......@@ -160,41 +160,33 @@ int BINsearch_event(chromosome_event_t * event_space, int * event_ids, int is_sm
}
}
#define ANTI_SUPPORTING_READ_LIMIT 100
int anti_supporting_read_scan(global_context_t * global_context)
{
indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
if(indel_context->total_events<1)return 0;
chromosome_event_t * event_space = indel_context -> event_space_dynamic;
int x1, * small_side_ordered_event_ids, * large_side_ordered_event_ids, cancelled_events=0, * cancelled_event_list = malloc(sizeof(int)*ANTI_SUPPORTING_READ_LIMIT), x2;
small_side_ordered_event_ids = malloc(sizeof(int)*indel_context -> total_events);
large_side_ordered_event_ids = malloc(sizeof(int)*indel_context -> total_events);
typedef struct{
int thread_id;
int block_start;
int block_end;
HashTable * result_tab;
int * small_side_ordered_event_ids, * large_side_ordered_event_ids;
chromosome_event_t * event_space;
global_context_t * global_context;
} AT_context_t;
for(x1=0; x1<indel_context->total_events; x1++)
{
small_side_ordered_event_ids[x1]=x1;
large_side_ordered_event_ids[x1]=x1;
}
void * sort_data[3];
sort_data[0] = small_side_ordered_event_ids;
sort_data[1] = event_space;
sort_data[2] = (void *)0xffff; // small_side_ordering
#define ANTI_SUPPORTING_READ_LIMIT 100
merge_sort(sort_data, indel_context->total_events, compare_event_sides, exchange_event_sides, merge_event_sides);
void * anti_support_thread_run(void * v_cont){
AT_context_t * atcont = v_cont;
sort_data[0] = large_side_ordered_event_ids;
sort_data[2] = NULL; // large_side_ordering
int * cancelled_event_list = malloc(sizeof(int)*ANTI_SUPPORTING_READ_LIMIT), x2;
int * small_side_ordered_event_ids = atcont -> small_side_ordered_event_ids, * large_side_ordered_event_ids = atcont -> large_side_ordered_event_ids;
global_context_t * global_context = atcont -> global_context;
chromosome_event_t * event_space = atcont -> event_space;
indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
if(indel_context->total_events<1)return NULL;
merge_sort(sort_data, indel_context->total_events, compare_event_sides, exchange_event_sides, merge_event_sides);
int current_read_number, cancelled_events, x1;
int current_read_number;
for(current_read_number = 0; current_read_number < global_context -> processed_reads_in_chunk; current_read_number++)
for(current_read_number = atcont -> block_start; current_read_number < atcont -> block_end; current_read_number++)
{
int is_second_read;
for (is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read ++)
......@@ -228,8 +220,9 @@ int anti_supporting_read_scan(global_context_t * global_context)
if(event_body -> event_small_side <= coverage_start + 5) continue;
if(event_body -> event_small_side >= coverage_end - 5) continue;
event_body -> anti_supporting_reads ++;
//printf("OCT27-ANTISUP-READ @ %u has SMALL @ %u~%u , INDELS = %d, ASUP = %d\n", coverage_start - 1, event_body -> event_small_side, event_body -> event_large_side, event_body -> indel_length, event_body -> anti_supporting_reads);
long long cur_count = HashTableGet(atcont -> result_tab , NULL+small_side_ordered_event_ids[x1]+1 ) - NULL;
cur_count++;
HashTablePut( atcont -> result_tab , NULL+small_side_ordered_event_ids[x1]+1 , NULL+cur_count );
cancelled_event_list[cancelled_events++] = small_side_ordered_event_ids[x1];
}
......@@ -251,14 +244,85 @@ int anti_supporting_read_scan(global_context_t * global_context)
if(to_be_add){
// printf("OCT27-ANTISUP-READ @ %u has LARGE @ %u~%u, INDELS = %d, ASUP = %d\n", coverage_end, event_body -> event_small_side, event_body -> event_large_side, event_body -> indel_length, event_body -> anti_supporting_reads);
event_body -> anti_supporting_reads ++;
long long cur_count = HashTableGet(atcont -> result_tab , NULL+large_side_ordered_event_ids[x1]+1 ) - NULL;
cur_count++;
HashTablePut( atcont -> result_tab , NULL+large_side_ordered_event_ids[x1]+1 , NULL+cur_count );
}
}
}
}
bigtable_release_result(global_context, NULL, current_read_number, 0);
}
free(cancelled_event_list);
return NULL;
}
void anti_support_add_count(void * ky, void * va, HashTable * tab){
chromosome_event_t * event_space = tab -> appendix1 ;
int eno = ky-NULL-1;
chromosome_event_t * event_body = event_space + eno;
event_body -> anti_supporting_reads+= (va - NULL);
}
int anti_supporting_read_scan(global_context_t * global_context)
{
indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
if(indel_context->total_events<1)return 0;
chromosome_event_t * event_space = indel_context -> event_space_dynamic;
int x1, * small_side_ordered_event_ids, * large_side_ordered_event_ids;
small_side_ordered_event_ids = malloc(sizeof(int)*indel_context -> total_events);
large_side_ordered_event_ids = malloc(sizeof(int)*indel_context -> total_events);
for(x1=0; x1<indel_context->total_events; x1++) {
small_side_ordered_event_ids[x1]=x1;
large_side_ordered_event_ids[x1]=x1;
}
void * sort_data[3];
sort_data[0] = small_side_ordered_event_ids;
sort_data[1] = event_space;
sort_data[2] = (void *)0xffff; // small_side_ordering
merge_sort(sort_data, indel_context->total_events, compare_event_sides, exchange_event_sides, merge_event_sides);
sort_data[0] = large_side_ordered_event_ids;
sort_data[2] = NULL; // large_side_ordering
merge_sort(sort_data, indel_context->total_events, compare_event_sides, exchange_event_sides, merge_event_sides);
AT_context_t ATconts[64];
pthread_t AThreads[64];
int thread_no, last_end = 0;
for(thread_no=0; thread_no< global_context -> config.all_threads; thread_no++){
AT_context_t * atc = ATconts + thread_no;
atc -> thread_id = thread_no;
atc -> block_start = last_end;
atc -> block_end = last_end = global_context -> processed_reads_in_chunk / global_context -> config.all_threads * thread_no;
if(thread_no == global_context -> config.all_threads-1) atc -> block_end = global_context -> processed_reads_in_chunk;
atc -> global_context = global_context;
atc -> result_tab = HashTableCreate(200000);
atc -> small_side_ordered_event_ids = small_side_ordered_event_ids;
atc -> large_side_ordered_event_ids = large_side_ordered_event_ids;
atc -> event_space = event_space;
pthread_create(AThreads + thread_no , NULL, anti_support_thread_run, atc);
}
for(thread_no=0; thread_no< global_context -> config.all_threads; thread_no++){
pthread_join(AThreads[thread_no], NULL);
ATconts[thread_no].result_tab -> appendix1 = event_space;
HashTableIteration( ATconts[thread_no].result_tab, anti_support_add_count );
HashTableDestroy(ATconts[thread_no].result_tab);
}
free(small_side_ordered_event_ids);
free(large_side_ordered_event_ids);
......@@ -2799,7 +2863,7 @@ int rectify_read_text(global_context_t * global_context, reassembly_by_voting_bl
unsigned int subread_int = 0, xk2;
for(xk2=0; xk2< global_context -> config.reassembly_subread_length; xk2++)
subread_int = (subread_int << 2) | base2int(next_read_txt[read_offset + xk2]);
gehash_go_q(index, subread_int , read_offset, read_len, 0, block_context -> vote_list_rectify, 22, 0, read_offset, 0, 0x7fffffff);
gehash_go_q(index, subread_int , read_offset, read_len, 0, block_context -> vote_list_rectify, 22, read_offset, 0, 0x7fffffff);
}
memset(block_context -> read_rectify_space, 0, MAX_READ_LENGTH * 4 * sizeof(short));
......@@ -2929,7 +2993,7 @@ int search_window_once(global_context_t * global_context, reassembly_by_voting_b
if(global_context -> config.reassembly_tolerable_voting)
gehash_go_q_tolerable(&block_context->voting_indexes[window_no], subread_int , read_offset, read_len, 0, block_context -> vote_list, 1, 22, 24, 0, read_offset, global_context -> config.reassembly_tolerable_voting, global_context -> config.reassembly_subread_length, 0, 0x7fffffff);
else
gehash_go_q(&block_context->voting_indexes[window_no], subread_int , read_offset, read_len, 0, block_context -> vote_list, 22, 0, read_offset, 0, 0x7fffffff);
gehash_go_q(&block_context->voting_indexes[window_no], subread_int , read_offset, read_len, 0, block_context -> vote_list,0,read_offset, 0, 0x7fffffff);
}
int WINDOW_SEARCH_TREE_WIDTH = 1;
......
......@@ -200,4 +200,5 @@ chromosome_event_t * local_add_indel_event(global_context_t * global_context, th
void print_indel_table(global_context_t * global_context);
int sort_junction_entry_table(global_context_t * global_context);
void mark_event_bitmap(unsigned char * bitmap, unsigned int pos);
int check_event_bitmap(unsigned char * bitmap, unsigned int pos);
#endif
......@@ -49,6 +49,9 @@ static struct option long_options[] =
{"reportPairedMultiBest", no_argument, 0, 0},
{"sv", no_argument, 0, 0},
{"longDel", no_argument, 0, 0},
{"exonAnnotationScreenOut", required_argument, 0, 0},
{"gtfFeature", required_argument, 0, 0},
{"gtfAttr", required_argument, 0, 0},
{"extraColumns", no_argument, 0, 0},
{"forcedPE", no_argument, 0, 0},
{"ignoreUnmapped", no_argument, 0, 0},
......@@ -596,6 +599,9 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
global_context->config.realignment_minimum_variant_distance = 1;
// global_context->config.max_indel_length = max(16, global_context->config.max_indel_length);
}
else if(strcmp("exonAnnotationScreenOut", long_options[option_index].name)==0){
strcpy(global_context->config.exon_annotation_file_screen_out, optarg);
}
else if(strcmp("minVoteCutoff", long_options[option_index].name)==0)
{
global_context->config.max_vote_number_cutoff = atoi(optarg);
......
......@@ -32,6 +32,7 @@ static struct option long_options[] =
{"junctionIns", required_argument, 0, 0},
{"multi", required_argument, 0, 'B'},
{"exonAnnotation", required_argument, 0, 'a'},
{"exonAnnotationScreenOut", required_argument, 0, 0},
{"exonAlias", required_argument, 0, 'A'},
{"exonFormat", required_argument, 0, 'F'},
{"gtfAttr", required_argument, 0, 0},
......@@ -113,13 +114,13 @@ void print_usage_core_subjunc()
SUBREADputs("");
SUBREADputs("# thresholds for mapping");
SUBREADputs("");
SUBREADputs(" -n <int> Number of selected subreads, 10 by default.");
SUBREADputs(" -n <int> Number of selected subreads, 14 by default.");
SUBREADputs("");
SUBREADputs(" -m <int> Consensus threshold for reporting a hit (minimal number of");
SUBREADputs(" subreads that map in consensus) . If paired-end, this gives");
SUBREADputs(" the consensus threshold for the anchor read (anchor read");
SUBREADputs(" receives more votes than the other read in the same pair).");
SUBREADputs(" 3 by default");
SUBREADputs(" 1 by default.");
SUBREADputs("");
SUBREADputs(" -p <int> Consensus threshold for the non- anchor read in a pair. 1 by");
SUBREADputs(" default.");
......@@ -629,6 +630,9 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
{
global_context->config.max_vote_number_cutoff = atoi(optarg);
}
else if(strcmp("exonAnnotationScreenOut", long_options[option_index].name)==0){
strcpy(global_context->config.exon_annotation_file_screen_out, optarg);
}
else if(strcmp("allJunctions", long_options[option_index].name)==0)
{
global_context->config.do_fusion_detection = 1;
......
This diff is collapsed.
This diff is collapsed.
......@@ -93,6 +93,7 @@ typedef struct{
typedef struct{
int is_paired_end_reads;
int is_internal_error;
gene_input_t first_read_file;
gene_input_t second_read_file;
unsigned long long first_read_file_size;
......@@ -140,6 +141,7 @@ typedef struct{
char first_read_file[MAX_FILE_NAME_LENGTH];
char second_read_file[MAX_FILE_NAME_LENGTH];
char exon_annotation_file[MAX_FILE_NAME_LENGTH];
char exon_annotation_file_screen_out[MAX_FILE_NAME_LENGTH];
char exon_annotation_alias_file[MAX_FILE_NAME_LENGTH];
int exon_annotation_file_type;
char exon_annotation_gene_id_column[MAX_READ_NAME_LEN];
......@@ -448,6 +450,19 @@ typedef struct {
} bigtable_cached_result_t;
typedef struct{
void * junction_tmp_r1;
void * junction_tmp_r2;
void * alignment_tmp_r1;
void * alignment_tmp_r2;
void * vote_simple_1_buffer;
void * vote_simple_2_buffer;
void * comb_buffer;
} topK_buffer_t;
typedef struct{
unsigned long long all_correct_PE_reads;
int thread_id;
......@@ -468,6 +483,8 @@ typedef struct{
unsigned int not_properly_pairs_only_one_end_mapped;
unsigned int all_multimapping_reads;
unsigned int all_uniquely_mapped_reads;
topK_buffer_t topKbuff;
} thread_context_t;
......@@ -512,6 +529,7 @@ typedef struct{
long long bigtable_cache_file_loaded_fragments_begin;
long long bigtable_cache_file_fragments;
bigtable_cached_result_t * bigtable_cache;
char *bigtable_cache_malloc_ptr, *bigtable_cache_malloc_ptr_junc;
unsigned int bigtable_chunked_fragments;
int bigtable_dirty_data;
......@@ -559,6 +577,7 @@ typedef struct{
bucketed_table_t translocation_result_table;
bucketed_table_t inversion_result_table;
topK_buffer_t topKbuff;
// per chunk parameters
subread_read_number_t read_block_start;
......
......@@ -113,7 +113,7 @@ void add_chro(char *sam_h)
SUBREADprintf("Added a new chromosome : %s [%u]\n", chro_name, chro_len);
}
void get_read_info(char * fl, char * chro, unsigned int * pos_0base , char * cigar, int *flags, int * tlen, char ** rtext){
void get_read_info(char * fl, char * chro, unsigned int * pos_1base , char * cigar, int *flags, int * tlen, char ** rtext){
char * tmp_tok = NULL;
char * tmp_res = strtok_r(fl, "\t", &tmp_tok);
......@@ -125,7 +125,7 @@ void get_read_info(char * fl, char * chro, unsigned int * pos_0base , char * cig
strcpy(chro, tmp_res);
tmp_res = strtok_r(NULL, "\t", &tmp_tok); // pos_0base
(*pos_0base) = atoi(tmp_res);
(*pos_1base) = atoi(tmp_res);
tmp_res = strtok_r(NULL, "\t", &tmp_tok); // qual
tmp_res = strtok_r(NULL, "\t", &tmp_tok); // cigar
......@@ -161,7 +161,11 @@ int covCalc()
coverage_bin_entry_t * chrbin12[2];
SamBam_FILE * in_fp = SamBam_fopen(input_file_name, is_BAM_input?SAMBAM_FILE_BAM:SAMBAM_FILE_SAM);
char * line_buffer = malloc(3000);
int this_hits = 0, r1_items = 0;
char last_chro[200],chro[200], *rtext = NULL;
last_chro[0]=0;
while(1)
{
char * is_ret = SamBam_fgets(in_fp, line_buffer, 2999, 1);
......@@ -177,31 +181,44 @@ int covCalc()
int flags=0, x1, is_junc = 0, tlen = 0;
char cigar_str[200];
char chro[200], *rtext = NULL;
unsigned int pos_0base = 0;
cigar_str[0]=0;
if(!paired_end) this_hits = 0;
if(paired_end && (reads%2) == 0){
this_hits = 0;
r1_items = 0;
chrbin12[0] = chrbin12[1] = NULL;
}
if(paired_end && (reads%2) == 1)
strcpy(last_chro, chro);
chro[0]=0;
int this_hits = 0;
get_read_info(line_buffer, chro, &pos_0base, cigar_str, &flags, &tlen, &rtext);
pos_0base --;
if(0 && FIXLENstrcmp("NS500643:166:HNCCHBGXY:1:23105:1714:15599" , line_buffer)==0)
SUBREADprintf("INPUT READ_%d: FLAG=%d, chro=%s, pos=%u\n", 1+(reads % 2 ), flags, chro, pos_0base);
if((flags & 4) || ( paired_end && (tlen == 0 || abs(tlen) > 500000) )){
reads++;
continue;
}
//if((flags & 4) || ( paired_end && (tlen == 0 || abs(tlen) > 500000) )){
// reads++;
// continue;
//}
coverage_bin_entry_t * chrbin = NULL;
unsigned int chrlen = 0;
int cigar_sections = 0;
if((flags & 4)==0){
void ** bin_entry = HashTableGet(cov_bin_table, chro);
if(NULL == bin_entry)
{
SUBREADprintf("ERROR: The chromosome name is not in header:%s\n", chro);
}
void ** bin_entry = HashTableGet(cov_bin_table, chro);
if(NULL == bin_entry)
{
SUBREADprintf("ERROR: The chromosome name is not in header:%s\n", chro);
chrbin = (coverage_bin_entry_t*) bin_entry[0];
chrlen = (void *)( bin_entry[1]) - NULL;
cigar_sections = RSubread_parse_CIGAR_string(chro, pos_0base, cigar_str, max_M, Chros, Staring_Points, Staring_Read_Points, Section_Lengths, &is_junc);
}
coverage_bin_entry_t * chrbin = (coverage_bin_entry_t*) bin_entry[0];
unsigned int chrlen = (void *)( bin_entry[1]) - NULL;
int cigar_sections = RSubread_parse_CIGAR_string(chro, pos_0base, cigar_str, max_M, Chros, Staring_Points, Staring_Read_Points, Section_Lengths, &is_junc);
if(paired_end) {
for(x1 = 0; x1 < cigar_sections; x1++){
unsigned int x2,x3;
......@@ -213,15 +230,18 @@ int covCalc()
int read_cursor = Staring_Read_Points[x1];
for(x2 = Staring_Points[x1]; x2<Staring_Points[x1]+Section_Lengths[x1]; x2++){
int found = 0;
for(x3=0; x3<this_hits; x3++){
if(this_hit_locs[x3] == x2){
found =1;
break;
if(reads%2 == 1 && strcmp(chro,last_chro)==0)
for(x3=0; x3<this_hits; x3++){
if(this_hit_locs[x3] == x2){
found =1;
break;
}
}
}
if(!found){
this_hit_bases[this_hits] = base_value_to_int(rtext[read_cursor]);
this_hit_locs[this_hits++] = x2;
if(reads % 2 == 0) r1_items ++;
}
if(this_hits >= MAX_FRAGMENT_LENGTH){
SUBREADprintf("ERROR: read is too long : %s!\n", cigar_str);
......@@ -231,25 +251,14 @@ int covCalc()
}
}
if(this_hits > 0) assert(chrbin);
chrbin12[reads%2] = chrbin;
} else for(x1 = 0; x1 < cigar_sections; x1++) {
unsigned int x2;
if(0 && FIXLENstrcmp("NS500643:166:HNCCHBGXY:4:23507:11772:18798" , line_buffer)==0)
SUBREADprintf("TEST_ONE %s [%d] : %u ~ %u (chr_len=%u) ; read_seq = [%d] %s\n", line_buffer, x1, Staring_Points[x1], x2<Staring_Points[x1]+Section_Lengths[x1], chrlen, Staring_Read_Points[x1], rtext + Staring_Read_Points[x1]);
if(0 && strcmp("chr12",chro) == 0 && Staring_Points[x1] <= 113994516 && Staring_Points[x1] + Section_Lengths[x1] > 113994516){
int rpos_0 = 113994516 - Staring_Points[x1] + Staring_Read_Points[x1];
SUBREADprintf(" TEST_BASE : %s : val=%c\n", line_buffer, rtext[rpos_0]);
}
int read_cursor = Staring_Read_Points[x1];
for(x2 = Staring_Points[x1]; x2<Staring_Points[x1]+Section_Lengths[x1]; x2++) {
if(x2 < chrlen) {
if(base_values){
chrbin[x2*5 + base_value_to_int(rtext[read_cursor])] ++;
if(0 && FIXLENstrcmp("NS500643:166:HNCCHBGXY:4:23507:11772:18798" , line_buffer)==0)
SUBREADprintf(" TEST_MY_BASE : [%d 0_base] = %c (%d) ; res=%d\n", x2, rtext[read_cursor], base_value_to_int(rtext[read_cursor]), chrbin[x2*5 + base_value_to_int(rtext[read_cursor])]);
}
else if(chrbin[x2] <= COVERAGE_MAX_INT)chrbin[x2] ++;
all_counted ++;
......@@ -263,14 +272,17 @@ int covCalc()
read_cursor ++;
}
}
if(reads % 2 == 1){
int r,x1;
for(r = 0; r < 2; r++){
if(this_hits>0)assert(chrbin12[r]);
for(x1 = 0; x1<this_hits;x1++){
if(base_values) chrbin12[r][this_hit_locs[x1]*5 + this_hit_bases[x1]]++;
else chrbin12[r][this_hit_locs[x1]]++;
}
//SUBREADprintf("SREAD_%d , R1ITEMS=%d, R1+2ITEMS=%d, BIN1=%p, BIN2=%p\n", reads, r1_items, this_hits, chrbin12[0], chrbin12[1]);
if((reads % 2) == 1 && paired_end){
int x1;
for(x1 = 0; x1<this_hits;x1++){
coverage_bin_entry_t * this_ch_binn = x1 < r1_items?chrbin12[0]:chrbin12[1];
if(base_values) this_ch_binn[this_hit_locs[x1]*5 + this_hit_bases[x1]]++;
else this_ch_binn[this_hit_locs[x1]]++;
all_counted ++;
if(all_counted % 10000000 == 0)
SUBREADprintf("Processed %llu bases.\n", all_counted);
}
}
reads ++;
......
......@@ -437,23 +437,25 @@ unsigned int get_gene_linear(int chrono, int offset, const unsigned int offsets
return offset;
}
int locate_gene_position_max(unsigned int linear, const gene_offset_t* offsets , char ** chro_name, int * pos, int * head_cut_length, int * tail_cut_length, int rl)
{
int n = 0;
int total_offsets = offsets -> total_offsets;
int GENE_LOCATE_JUMP = total_offsets/3;
while (GENE_LOCATE_JUMP >3)
{
while(n+GENE_LOCATE_JUMP < total_offsets && offsets->read_offsets[n+GENE_LOCATE_JUMP] <= linear)
n+=GENE_LOCATE_JUMP;
GENE_LOCATE_JUMP /=3;
(*chro_name) = NULL;
(*pos) = -1;
int low_idx=0, high_idx= offsets->total_offsets;
while(1){
if(high_idx <= low_idx+1) {
n = max(low_idx-2,0);
break;
}
int mid_idx = (low_idx+high_idx)/2;
unsigned int mid_val = offsets->read_offsets[mid_idx];
if(mid_val > linear) high_idx = mid_idx;
else low_idx = mid_idx+1;
}
for (; offsets->read_offsets[n]; n++)
for (; n<offsets->total_offsets; n++)
{
if (offsets->read_offsets[n] > linear)
{
......@@ -470,7 +472,9 @@ int locate_gene_position_max(unsigned int linear, const gene_offset_t* offsets ,
if(tail_cut_length == NULL){
if(rl + linear > offsets->read_offsets[n] + 15 - offsets -> padding) return 1;
if(rl + linear > offsets->read_offsets[n] + 15 - offsets -> padding){
return 1;
}
} else {