New upstream version 1.5.3+dfsg

parent 5ca905c2
This diff is collapsed.
......@@ -36,10 +36,12 @@
#else
#ifndef __MINGW32__
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <net/if.h>
#endif
#include <unistd.h>
#include <netinet/in.h>
#endif
......@@ -768,7 +770,7 @@ int strcmp_number(char * s1, char * s2)
int mac_str(char * str_buff)
{
#ifdef FREEBSD
#if defined(FREEBSD) || defined(__MINGW32__)
return 1;
#else
#ifdef MACOS
......@@ -980,10 +982,10 @@ double fast_fisher_test_one_side(unsigned int a, unsigned int b, unsigned int c,
}
int load_features_annotation(char * file_name, int file_type, char * gene_id_column, char * feature_name_column,
void * context, int do_add_feature(char * gene_name, char * chro_name, unsigned int start, unsigned int end, int is_negative_strand, void * context) ){
int load_features_annotation(char * file_name, int file_type, char * gene_id_column, char * transcript_id_column, char * used_feature_type,
void * context, int do_add_feature(char * gene_name, char * transcript_name, char * chro_name, unsigned int start, unsigned int end, int is_negative_strand, void * context) ){
char * file_line = malloc(MAX_LINE_LENGTH+1);
int lineno = 0, is_GFF_warned = 0, loaded_features = 0;
int lineno = 0, is_GFF_txid_warned = 0, is_GFF_geneid_warned = 0, loaded_features = 0;
FILE * fp = fopen(file_name, "r");
if(NULL == fp){
......@@ -992,9 +994,9 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
}
while(1){
int is_gene_id_found = 0, is_negative_strand = -1;
char * token_temp = NULL, * feature_name, * chro_name = NULL;
char feature_name_tmp[FEATURE_NAME_LENGTH];
int is_tx_id_found = 0, is_gene_id_found = 0, is_negative_strand = -1;
char * token_temp = NULL, * feature_name, *transcript_id = NULL, * chro_name = NULL;
char feature_name_tmp[FEATURE_NAME_LENGTH], txid_tmp[FEATURE_NAME_LENGTH];
feature_name = feature_name_tmp;
unsigned int start = 0, end = 0;
......@@ -1049,7 +1051,7 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
strtok_r(NULL,"\t", &token_temp);// source
char * feature_type = strtok_r(NULL,"\t", &token_temp);// feature_type
if(strcmp(feature_type, feature_name_column)==0){
if(strcmp(feature_type, used_feature_type)==0){
char * start_ptr = strtok_r(NULL,"\t", &token_temp);
char * end_ptr = strtok_r(NULL,"\t", &token_temp);
......@@ -1084,23 +1086,37 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
if(extra_attrs && (strlen(extra_attrs)>2)){
int attr_val_len = GTF_extra_column_value(extra_attrs , gene_id_column , feature_name_tmp, FEATURE_NAME_LENGTH);
if(attr_val_len>0) is_gene_id_found=1;
if(transcript_id_column){
transcript_id = txid_tmp;
attr_val_len = GTF_extra_column_value(extra_attrs , transcript_id_column , txid_tmp, FEATURE_NAME_LENGTH);
if(attr_val_len>0) is_tx_id_found=1;
else transcript_id = NULL;
}
}
if(!is_gene_id_found){
if(!is_GFF_warned)
{
if(!is_GFF_geneid_warned){
int ext_att_len = strlen(extra_attrs);
if(extra_attrs[ext_att_len-1] == '\n') extra_attrs[ext_att_len-1] =0;
SUBREADprintf("\nWarning: failed to find the gene identifier attribute in the 9th column of the provided GTF file.\nThe specified gene identifier attribute is '%s' \nThe attributes included in your GTF annotation are '%s' \n\n", gene_id_column, extra_attrs);
}
is_GFF_warned++;
is_GFF_geneid_warned++;
}
if(transcript_id_column && !is_tx_id_found){
if(!is_GFF_txid_warned){
int ext_att_len = strlen(extra_attrs);
if(extra_attrs[ext_att_len-1] == '\n') extra_attrs[ext_att_len-1] =0;
SUBREADprintf("\nWarning: failed to find the transcript identifier attribute in the 9th column of the provided GTF file.\nThe specified gene identifier attribute is '%s' \nThe attributes included in your GTF annotation are '%s' \n\n", transcript_id_column, extra_attrs);
}
is_GFF_txid_warned++;
}
}
}
if(is_gene_id_found){
do_add_feature(feature_name, chro_name, start, end, is_negative_strand, context);
do_add_feature(feature_name, transcript_id, chro_name, start, end, is_negative_strand, context);
loaded_features++;
}
......
......@@ -71,8 +71,8 @@ unsigned int find_left_end_cigar(unsigned int right_pos, char * cigar);
int mac_or_rand_str(char * char_14);
double fast_fisher_test_one_side(unsigned int a, unsigned int b, unsigned int c, unsigned int d, long double * frac_buffer, int buffer_size);
int load_features_annotation(char * file_name, int file_type, char * gene_id_column, char * feature_name_column,
void * context, int do_add_feature(char * gene_name, char * chro_name, unsigned int start, unsigned int end, int is_negative_strand, void * context) );
int load_features_annotation(char * file_name, int file_type, char * gene_id_column, char * transcript_id_column, char * used_feature_type,
void * context, int do_add_feature(char * gene_name, char * transcript_id, char * chrome_name, unsigned int start, unsigned int end, int is_negative_strand, void * context) );
HashTable * load_alias_table(char * fname) ;
#endif
......@@ -3,7 +3,7 @@ include makefile.version
MACOS = -D FREEBSD
CCFLAGS = -march=native -mtune=core2 ${MACOS} -O9 -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\"
CCFLAGS = -march=native -mtune=core2 ${MACOS} -O9 -Wall -Wno-maybe-uninitialized -Wno-incompatible-pointer-types -Wno-array-bounds -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unused-result -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\"
LDFLAGS = -pthread -lz -lm ${MACOS} -DMAKE_FOR_EXON -D MAKE_STANDALONE -l compat # -DREPORT_ALL_THE_BEST
CC = gcc ${CCFLAGS} -ggdb -fomit-frame-pointer -ffast-math -funroll-loops -mmmx -msse -msse2 -msse3 -fmessage-length=0
......@@ -14,20 +14,23 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped coverageCount
all: repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped coverageCount
mkdir -p ../bin/utilities
mv subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv coverageCount subtools qualityScores propmapped subread-fullscan removeDup ../bin/utilities
mv repair coverageCount subtools qualityScores propmapped subread-fullscan removeDup ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
@echo "# Installation complete. #"
@echo "# Installation successfully complete. #"
@echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #"
@echo "###########################################################"
@echo
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
propmapped: propmapped.c ${ALL_OBJECTS}
${CC} -o propmapped propmapped.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -3,10 +3,10 @@
include makefile.version
OPT_LEVEL = 3
CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64
LDFLAGS = ${STATIC_MAKE} -lpthread -lz -lm ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE # -DREPORT_ALL_THE_BEST
CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 # -w
LDFLAGS = ${STATIC_MAKE} -pthread -lz -lm ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE
CC_EXEC = gcc
CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb # -fomit-frame-pointer -ffast-math -funroll-loops -mmmx -msse -msse2 -msse3 -fmessage-length=0
CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb
ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge long-hashtable core-bigtable seek-zlib
......@@ -14,20 +14,26 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # samMappedBases mergeVCF testZlib
all: repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # samMappedBases mergeVCF testZlib
mkdir -p ../bin/utilities
mv subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount propmapped qualityScores removeDup subread-fullscan ../bin/utilities
mv repair coverageCount propmapped qualityScores removeDup subread-fullscan txUnique ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
@echo "# Installation complete. #"
@echo "# Installation successfully complete. #"
@echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #"
@echo "###########################################################"
@echo
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
txUnique: tx-unique.c tx-unique.h ${ALL_OBJECTS}
${CC} -o txUnique tx-unique.c ${ALL_OBJECTS} ${LDFLAGS}
globalReassembly: global-reassembly.c ${ALL_OBJECTS}
${CC} -o globalReassembly global-reassembly.c ${ALL_OBJECTS} ${LDFLAGS}
......@@ -67,16 +73,5 @@ subread-fullscan: fullscan.c ${ALL_OBJECTS}
coverageCount: coverage_calc.c ${ALL_OBJECTS}
${CC} -o coverageCount coverage_calc.c ${ALL_OBJECTS} ${LDFLAGS}
#testZlib: test-seek-zlib.c ${ALL_OBJECTS}
# ${CC} -o testZlib test-seek-zlib.c ${ALL_OBJECTS} ${LDFLAGS}
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
#samMappedBases: samMappedBases.c ${ALL_OBJECTS}
# ${CC} -o samMappedBases samMappedBases.c ${ALL_OBJECTS} ${LDFLAGS}
#mergeVCF: mergeVCF.c ${ALL_OBJECTS}
# ${CC} -o mergeVCF mergeVCF.c ${ALL_OBJECTS} ${LDFLAGS}
clean:
rm -f core featureCounts exactSNP removeDup subread-buildindex ${ALL_OBJECTS}
MACOS = -D MACOS
include makefile.version
CCFLAGS = -mtune=core2 ${MACOS} -O9 -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64
CCFLAGS = -mtune=core2 ${MACOS} -O9 -w -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64
LDFLAGS = -pthread -lz -lm ${MACOS} -DMAKE_FOR_EXON -D MAKE_STANDALONE # -DREPORT_ALL_THE_BEST
CC = gcc ${CCFLAGS} ${STATIC_MAKE} -ggdb -fomit-frame-pointer -O3 -ffast-math -funroll-loops -mmmx -msse -msse2 -msse3 -fmessage-length=0
......@@ -18,7 +18,7 @@ all: repair featureCounts removeDup exactSNP subread-buildindex subindel subrea
@echo
@echo "###########################################################"
@echo "# #"
@echo "# Installation complete. #"
@echo "# Installation successfully complete. #"
@echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #"
......@@ -27,7 +27,7 @@ all: repair featureCounts removeDup exactSNP subread-buildindex subindel subrea
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
propmapped: propmapped.c ${ALL_OBJECTS}
${CC} -o propmapped propmapped.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -1117,13 +1117,6 @@ int finalise_indel_and_junction_thread(global_context_t * global_context, thread
prev_env = this_event;
}
if(0){
for(xk1 = 0; xk1 < merge_target_items; xk1++){
chromosome_event_t * pev = merge_target + xk1;
printf("OCT27-MERGERES: %u~%u, indel=%d, nsup=%d, TYPE=%d\n",pev->event_small_side, pev->event_large_side, pev->indel_length, pev->supporting_reads, pev->event_type);
}
}
free(records);
if(thread_contexts)
......@@ -1238,7 +1231,7 @@ typedef struct {
HashTable * feature_sorting_table;
} do_load_juncs_context_t;
int do_juncs_add_feature(char * gene_name, char * chro_name, unsigned int feature_start, unsigned int feature_end, int is_negative_strand, void * context){
int do_juncs_add_feature(char * gene_name, char * transcript_id, char * chro_name, unsigned int feature_start, unsigned int feature_end, int is_negative_strand, void * context){
//#warning ">>>>>>> COMMENt NEXT <<<<<<<<<<<<<<"
//SUBREADprintf("INJ LOCS: %s : %u, %u\n", chro_name, feature_start, feature_end);
do_load_juncs_context_t * do_load_juncs_context = context;
......@@ -1336,8 +1329,7 @@ int load_known_junctions(global_context_t * global_context){
do_load_juncs_context.global_context = global_context;
do_load_juncs_context.feature_sorting_table = feature_sorting_table;
int features = load_features_annotation(global_context->config.exon_annotation_file , global_context->config.exon_annotation_file_type, global_context->config.exon_annotation_gene_id_column, global_context->config.exon_annotation_feature_name_column,
&do_load_juncs_context, do_juncs_add_feature);
int features = load_features_annotation(global_context->config.exon_annotation_file , global_context->config.exon_annotation_file_type, global_context->config.exon_annotation_gene_id_column, NULL, global_context->config.exon_annotation_feature_name_column, &do_load_juncs_context, do_juncs_add_feature);
feature_sorting_table -> appendix1 = global_context;
HashTableIteration(feature_sorting_table, add_annotation_to_junctions);
......@@ -1476,22 +1468,9 @@ int search_event(global_context_t * global_context, HashTable * event_table, chr
}
//#warning ">>>>>>>>>>>>>> COMMENT THIS <<<<<<<<<<<<<<<<<<<<<<<<<<<<"
if(0){
indel_context_t * indel_context = (indel_context_t *) global_context -> module_contexts[MODULE_INDEL_ID];
chromosome_event_t * est = indel_context -> event_space_dynamic;
if(est == event_space)
printf("OCT27-STEPRS-EVENT_HIT= %u ; HIT=%d\n", pos, xk2);
}
}else{
//#warning ">>>>>>>>>>>>>> COMMENT THIS <<<<<<<<<<<<<<<<<<<<<<<<<<<<"
if(0){
indel_context_t * indel_context = (indel_context_t *) global_context -> module_contexts[MODULE_INDEL_ID];
chromosome_event_t * est = indel_context -> event_space_dynamic;
if(est == event_space)
printf("OCT27-STEPRS-EVENT_HIT= %u ; HIT=0000\n", pos);
}
}
return ret;
......@@ -2275,7 +2254,7 @@ void print_indel_table(global_context_t * global_context){
for(xk1 = 0; xk1 < indel_context -> total_events ; xk1++){
chromosome_event_t * event_body = indel_context -> event_space_dynamic +xk1;
printf("OCT27-STEP-INTAB-TYPE-%d POS %u~%u GID=%u PV %d %d SUP %d / %d\n", event_body -> event_type, event_body -> event_small_side, event_body -> event_large_side, event_body -> global_event_id, event_body -> connected_next_event_distance, event_body -> connected_previous_event_distance , event_body -> supporting_reads , event_body -> anti_supporting_reads);
//printf("OCT27-STEP-INTAB-TYPE-%d POS %u~%u GID=%u PV %d %d SUP %d / %d\n", event_body -> event_type, event_body -> event_small_side, event_body -> event_large_side, event_body -> global_event_id, event_body -> connected_next_event_distance, event_body -> connected_previous_event_distance , event_body -> supporting_reads , event_body -> anti_supporting_reads);
}
int bucket;
......@@ -2288,7 +2267,7 @@ void print_indel_table(global_context_t * global_context){
int env_i;
for(env_i = 1; env_array[env_i]; env_i++){
chromosome_event_t * event_body = indel_context -> event_space_dynamic + (env_array[env_i]-1);
printf("OCT27-STEPQ-ENTAB-%u [%d] to %u ~ %u len=%d VAL=%d PTR=%p\n",entry_pos, env_i, event_body -> event_small_side, event_body -> event_large_side, event_body -> indel_length, env_array[env_i], env_array);
// printf("OCT27-STEPQ-ENTAB-%u [%d] to %u ~ %u len=%d VAL=%d PTR=%p\n",entry_pos, env_i, event_body -> event_small_side, event_body -> event_large_side, event_body -> indel_length, env_array[env_i], env_array);
}
cursor = cursor->next;
......@@ -2327,8 +2306,8 @@ int write_indel_final_results(global_context_t * global_context)
chromosome_event_t * event_body = indel_context -> event_space_dynamic +xk1;
//#warning " ================= REMOVE '- 1' from the next LINE!!! ========================="
if((event_body -> event_type != CHRO_EVENT_TYPE_INDEL && event_body->event_type != CHRO_EVENT_TYPE_LONG_INDEL && event_body -> event_type != CHRO_EVENT_TYPE_POTENTIAL_INDEL)|| (event_body -> final_counted_reads < 1 && event_body -> event_type == CHRO_EVENT_TYPE_INDEL) )
//#warning " ================= REMOVE '- 1' from the next LINE!!! ========================="
if((event_body -> event_type != CHRO_EVENT_TYPE_INDEL && event_body->event_type != CHRO_EVENT_TYPE_LONG_INDEL && event_body -> event_type != CHRO_EVENT_TYPE_POTENTIAL_INDEL)|| (event_body -> final_counted_reads < 1 && event_body -> event_type == CHRO_EVENT_TYPE_INDEL) )
continue;
//assert((-event_body -> indel_length) < MAX_INSERTION_LENGTH);
......@@ -4451,6 +4430,7 @@ void init_global_context(global_context_t * context)
context->config.do_fusion_detection = 0;
context->config.do_structural_variance_detection = 0;
context->config.more_accurate_fusions = 1;
context->config.report_multi_mapping_reads = 0;
//#warning "============= best values for the SVs application: 8; 5; 32 ==============="
context->config.top_scores = 8 - 5;
......@@ -4471,7 +4451,6 @@ void init_global_context(global_context_t * context)
context->will_remove_input_file = 0;
context->config.ignore_unmapped_reads = 0;
context->config.report_unmapped_using_mate_pos = 1;
context->config.report_multi_mapping_reads = 1;
context->config.downscale_mapping_quality=0;
context->config.ambiguous_mapping_tolerance = 39;
context->config.use_hamming_distance_break_ties = 0;
......
......@@ -59,6 +59,7 @@ static struct option long_options[] =
{"complexIndels", no_argument, 0, 0},
{"minVoteCutoff", required_argument, 0, 0},
{"maxRealignLocations", required_argument, 0, 0},
{"multiMapping", no_argument, 0, 0},
{0, 0, 0, 0}
};
......@@ -68,15 +69,15 @@ void print_usage_core_aligner()
SUBREADprintf("\nVersion %s\n\n", SUBREAD_VERSION);
SUBREADputs("Usage:");
SUBREADputs("");
SUBREADputs("./subread-align [options] -i <index_name> -r <input> -t <type> -o <output>");
SUBREADputs("./subread-align [options] -i <index_name> -r <input> -t <type> -o <output>");
SUBREADputs("");
SUBREADputs("## Mandatory arguments:");
SUBREADputs(" ");
SUBREADputs(" -i <string> Base name of the index.");
SUBREADputs("");
SUBREADputs(" -r <string> Name of an input read file. If paired-end, this should be");
SUBREADputs(" the first read file (typically containing ‘R1’ in the file");
SUBREADputs(" name) and the second should be provided via ‘-R’.");
SUBREADputs(" the first read file (typically containing \"R1\"in the file");
SUBREADputs(" name) and the second should be provided via \"-R\".");
SUBREADputs(" Acceptable formats include gzipped FASTQ, FASTQ and FASTA.");
SUBREADputs(" These formats are identified automatically.");
SUBREADputs(" ");
......@@ -92,15 +93,15 @@ void print_usage_core_aligner()
SUBREADputs(" STDOUT.");
SUBREADputs("");
SUBREADputs(" -R <string> Name of the second read file in paired-end data (typically");
SUBREADputs(" containing ‘R2’ in the file name).");
SUBREADputs(" containing \"R2\" the file name).");
SUBREADputs("");
SUBREADputs(" --SAMinput Input reads are in SAM format.");
SUBREADputs("");
SUBREADputs(" --BAMinput Input reads are in BAM format.");
SUBREADputs("");
SUBREADputs(" --SAMoutput Save mapping results in SAM format.");
SUBREADputs(" --SAMoutput Save mapping results in SAM format.");
SUBREADputs("");
SUBREADputs("# offset value added to Phred quality scores of read bases");
SUBREADputs("# Phred offset");
SUBREADputs("");
SUBREADputs(" -P <3:6> Offset value added to the Phred quality score of each read");
SUBREADputs(" base. '3' for phred+33 and '6' for phred+64. '3' by default.");
......@@ -124,14 +125,13 @@ void print_usage_core_aligner()
SUBREADputs("");
SUBREADputs("# unique mapping and multi-mapping");
SUBREADputs("");
SUBREADputs(" -u Report uniquely mapped reads only. Number of matched bases (");
SUBREADputs(" for RNA-seq) or mis-matched bases(for genomic DNA-seq) is");
SUBREADputs(" used to break the tie when multiple mapping locations are");
SUBREADputs(" found.");
SUBREADputs(" --multiMapping Report multi-mapping reads in addition to uniquely mapped");
SUBREADputs(" reads. Use \"-B\" to set the maximum number of equally-best");
SUBREADputs(" alignments to be reported.");
SUBREADputs("");
SUBREADputs(" -B <int> Maximal number of equally-best mapping locations to be");
SUBREADputs(" reported. 1 by default. Note that -u option takes precedence");
SUBREADputs(" over -B.");
SUBREADputs(" -B <int> Maximum number of equally-best alignments to be reported for");
SUBREADputs(" a multi-mapping read. Equally-best alignments have the same");
SUBREADputs(" number of mis-matched bases. 1 by default.");
SUBREADputs("");
SUBREADputs("# indel detection");
SUBREADputs("");
......@@ -317,6 +317,7 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
global_context->config.max_vote_combinations = max(global_context->config.max_vote_combinations, global_context->config.reported_multi_best_reads + 1);
global_context->config.max_vote_simples = max(global_context->config.max_vote_simples, global_context->config.reported_multi_best_reads + 1);
global_context->config.report_multi_mapping_reads = 1;
break;
case 'H':
global_context->config.use_hamming_distance_break_ties = 1;
......@@ -347,10 +348,6 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
case 'U':
global_context->config.report_no_unpaired_reads = 1;
break;
case 'u':
global_context->config.report_multi_mapping_reads = 0;
global_context->config.use_hamming_distance_break_ties = 1;
break;
case 'b':
global_context->config.convert_color_to_base = 1;
break;
......@@ -466,7 +463,11 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
break;
case 0:
if(strcmp("memoryMultiplex", long_options[option_index].name)==0)
if(strcmp("multiMapping", long_options[option_index].name)==0)
{
global_context->config.report_multi_mapping_reads = 1;
}
else if(strcmp("memoryMultiplex", long_options[option_index].name)==0)
{
global_context->config.memory_use_multiplex = atof(optarg);
}
......
......@@ -66,6 +66,7 @@ static struct option long_options[] =
{"minVoteCutoff", required_argument, 0, 0},
{"minMappedFraction", required_argument, 0, 0},
{"complexIndels", no_argument, 0, 0},
{"multiMapping", no_argument, 0, 0},
{0, 0, 0, 0}
};
......@@ -81,10 +82,11 @@ void print_usage_core_subjunc()
SUBREADputs("");
SUBREADputs(" -i <index> Base name of the index.");
SUBREADputs("");
SUBREADputs(" -r <string> Name of the input file. Input formats including gzipped");
SUBREADputs(" fastq, fastq, and fasta can be automatically detected. If");
SUBREADputs(" paired-end, this should give the name of file including");
SUBREADputs(" first reads.");
SUBREADputs(" -r <string> Name of an input read file. If paired-end, this should be");
SUBREADputs(" the first read file (typically containing \"R1\"in the file");
SUBREADputs(" name) and the second should be provided via \"-R\".");
SUBREADputs(" Acceptable formats include gzipped FASTQ, FASTQ and FASTA.");
SUBREADputs(" These formats are identified automatically.");
SUBREADputs("");
SUBREADputs("## Optional arguments:");
SUBREADputs("# input reads and output");
......@@ -94,15 +96,15 @@ void print_usage_core_subjunc()
SUBREADputs(" STDOUT.");
SUBREADputs("");
SUBREADputs(" -R <string> Name of the second read file in paired-end data (typically");
SUBREADputs(" containing ‘R2’ in the file name).");
SUBREADputs(" containing \"R2\" the file name).");
SUBREADputs("");
SUBREADputs(" --SAMinput Input reads are in SAM format.");
SUBREADputs("");
SUBREADputs(" --BAMinput Input reads are in BAM format.");
SUBREADputs("");
SUBREADputs(" --SAMoutput Save mapping results in SAM format.");
SUBREADputs(" --SAMoutput Save mapping results in SAM format.");
SUBREADputs("");
SUBREADputs("# offset value added to Phred quality scores of read bases");
SUBREADputs("# Phred offset");
SUBREADputs("");
SUBREADputs(" -P <3:6> Offset value added to the Phred quality score of each read");
SUBREADputs(" base. '3' for phred+33 and '6' for phred+64. '3' by default.");
......@@ -126,14 +128,13 @@ void print_usage_core_subjunc()
SUBREADputs("");
SUBREADputs("# unique mapping and multi-mapping");
SUBREADputs("");
SUBREADputs(" -u Report uniquely mapped reads only. Number of matched bases (");
SUBREADputs(" for RNA-seq) or mis-matched bases(for genomic DNA-seq) is");
SUBREADputs(" used to break the tie when multiple mapping locations are");
SUBREADputs(" found.");
SUBREADputs(" --multiMapping Report multi-mapping reads in addition to uniquely mapped");
SUBREADputs(" reads. Use \"-B\" to set the maximum number of equally-best");
SUBREADputs(" alignments to be reported.");
SUBREADputs("");
SUBREADputs(" -B <int> Maximal number of equally-best mapping locations to be");
SUBREADputs(" reported. 1 by default. Note that -u option takes precedence");
SUBREADputs(" over -B.");
SUBREADputs(" -B <int> Maximum number of equally-best alignments to be reported for");
SUBREADputs(" a multi-mapping read. Equally-best alignments have the same");
SUBREADputs(" number of mis-matched bases. 1 by default.");
SUBREADputs("");
SUBREADputs("# indel detection");
SUBREADputs("");
......@@ -353,10 +354,6 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
case 'U':
global_context->config.report_no_unpaired_reads = 1;
break;
case 'u':
global_context->config.report_multi_mapping_reads = 0;
global_context->config.use_hamming_distance_break_ties = 1;
break;
case 'b':
global_context->config.convert_color_to_base = 1;
break;
......@@ -475,7 +472,11 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
break;
case 0:
if(strcmp("memoryMultiplex", long_options[option_index].name)==0)
if(strcmp("multiMapping", long_options[option_index].name)==0)
{
global_context->config.report_multi_mapping_reads = 1;
}
else if(strcmp("memoryMultiplex", long_options[option_index].name)==0)
{
global_context->config.memory_use_multiplex = atof(optarg);
}
......
......@@ -202,7 +202,7 @@ void search_events_to_front(global_context_t * global_context, thread_context_t
SUBREADprintf("F_JUMP? match=%d / tested=%d\n", matched_bases_to_site , tested_read_pos);
//#warning "========= remove - 2000 from next line ============="
if(tested_read_pos >0 && ( matched_bases_to_site*10000/tested_read_pos > 9000 - 2000 || global_context->config.maximise_sensitivity_indel) )
if(explain_context -> total_tries < REALIGN_TOTAL_TRIES && tested_read_pos >0 && ( matched_bases_to_site*10000/tested_read_pos > 9000 - 2000 || global_context->config.maximise_sensitivity_indel) )
for(xk1 = 0; xk1 < site_events_no ; xk1++)
{
chromosome_event_t * tested_event = site_events[xk1];
......@@ -275,16 +275,10 @@ void search_events_to_front(global_context_t * global_context, thread_context_t
explain_context -> tmp_search_sections ++;
if(0 && FIXLENstrcmp("R000404427", explain_context -> read_name) == 0)
SUBREADprintf("FRONT_ADD_EVENT : %s , %u ~ %u , INDELLEN=%d, TEST_READ_POS=%u, RPED=%u, ABSSTART=%u\n", explain_context -> read_name, tested_event -> event_small_side, tested_event -> event_large_side, tested_event -> indel_length, tested_read_pos, explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].read_pos_end, new_read_head_abs_offset);
//if(explain_context -> pair_number == 23){
//printf("JUMP_IN: %u ; STRAND=%c ; REMENDER=%d ; 0=%d 0=%d\n", new_read_head_abs_offset, tested_event -> is_strand_jumped?'X':'=', new_remainder_len, tested_event -> indel_length, tested_event -> indel_at_junction);
//}
if(0 && FIXLENstrcmp("R000404427", explain_context -> read_name) == 0)
SUBREADprintf("FRONT_ADD_EVENT : %s , %u ~ %u , INDELLEN=%d, TEST_READ_POS=%u, RPED=%u, ABSSTART=%u\n", explain_context -> read_name, tested_event -> event_small_side, tested_event -> event_large_side, tested_event -> indel_length, tested_read_pos, explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].read_pos_end, new_read_head_abs_offset);
// #warning "SUBREAD_151 REMOVE THIS ASSERTION! "
// assert(new_remainder_len < 102);
//printf("SUGGEST_NEXT = %d (! %d)\n", tested_event -> connected_next_event_distance, tested_event -> connected_previous_event_distance);
explain_context -> total_tries ++;
search_events_to_front(global_context, thread_context, explain_context, read_text + tested_event -> indel_at_junction + tested_read_pos - min(0, tested_event->indel_length), qual_text + tested_read_pos - min(0, tested_event->indel_length), new_read_head_abs_offset, new_remainder_len, sofar_matched + matched_bases_to_site - jump_penalty, tested_event -> connected_next_event_distance, 0);
explain_context -> tmp_search_sections --;
......@@ -536,7 +530,7 @@ void search_events_to_back(global_context_t * global_context, thread_context_t *
//#warning ">>>>>>>>>>>>>>>> REMOVE IT <<<<<<<<<<<<<<<<<<<<<<"
//printf("OCT27-STEPSB-JB-%s: test %u = %d events; TEST=%d > 7000 : MA=%d; %s ; %u = %u - (%d - %d) ; LEV=%d\n", explain_context -> read_name, potential_event_pos, site_events_no, (read_tail_pos<=tested_read_pos)?(-1234):( matched_bases_to_site*10000/(read_tail_pos - tested_read_pos)) , matched_bases_to_site, read_text + tested_read_pos, potential_event_pos, read_tail_abs_offset, read_tail_pos, tested_read_pos, explain_context -> tmp_search_sections);
//#warning "========= remove - 2000 from next line ============="
if((read_tail_pos>tested_read_pos) && ( matched_bases_to_site*10000/(read_tail_pos - tested_read_pos) > 9000 - 2000 || global_context->config.maximise_sensitivity_indel) )
if(explain_context -> total_tries < REALIGN_TOTAL_TRIES && (read_tail_pos>tested_read_pos) && ( matched_bases_to_site*10000/(read_tail_pos - tested_read_pos) > 9000 - 2000 || global_context->config.maximise_sensitivity_indel) )
for(xk1 = 0; xk1 < site_events_no ; xk1++)
{
chromosome_event_t * tested_event = site_events[xk1];
......@@ -617,6 +611,7 @@ void search_events_to_back(global_context_t * global_context, thread_context_t *
//#warning ">>>>>>>>>>>>>>>> REMOVE IT <<<<<<<<<<<<<<<<<<<<<<"
//printf("OCT27-STEPSB-JB-%s: %u IN -> %u; NEW_TAIL=%d; ENV_CONN=%d; LEV=%d\n", explain_context -> read_name, potential_event_pos, new_read_tail_abs_offset, new_read_tail_pos, tested_event -> connected_previous_event_distance, explain_context -> tmp_search_sections);
explain_context -> total_tries ++;
search_events_to_back(global_context, thread_context, explain_context, read_text , qual_text, new_read_tail_abs_offset , new_read_tail_pos, sofar_matched + matched_bases_to_site - jump_penalty, tested_event -> connected_previous_event_distance, 0);
//#warning ">>>>>>>>>>>>>>>> REMOVE IT <<<<<<<<<<<<<<<<<<<<<<"
//printf("OCT27-STEPSB-JB-%s: %u OUT <- %u; LEN=%d\n", explain_context -> read_name, potential_event_pos, new_read_tail_abs_offset, explain_context -> tmp_search_sections);
......@@ -2542,6 +2537,7 @@ unsigned int explain_read(global_context_t * global_context, thread_context_t *
explain_context.pair_number = pair_number;
explain_context.is_second_read = is_second_read ;
explain_context.best_read_id = best_read_id;
explain_context.total_tries = 0;
unsigned int back_search_tail_position,front_search_start_position;
unsigned short back_search_read_tail, front_search_read_start;
......@@ -2582,9 +2578,11 @@ unsigned int explain_read(global_context_t * global_context, thread_context_t *
front_search_start_position = current_result -> selected_position + front_search_read_start;
}
if(0 && FIXLENstrcmp( explain_context.read_name, "R000002689")==0)
{
SUBREADprintf("EXPLAIN_READ_%d %s [%d]: POS=%u ;; BACK SEARCH TAILPOS=%u, READTAIL=%d ; INDEL_IN_CONF=%d ; READ_COV=%d~%d\n", 1+is_second_read, explain_context.read_name, best_read_id, current_result -> selected_position, back_search_tail_position, back_search_read_tail, current_result -> indels_in_confident_coverage, front_search_read_start, back_search_read_tail);
if(0 && FIXLENstrcmp("SRR3439488.572382", explain_context.read_name)==0) {
char * q_res_chro=NULL;
int q_res_offset=0;
locate_gene_position(current_result -> selected_position,&global_context -> chromosome_table, &q_res_chro, &q_res_offset);
SUBREADprintf("EXPLAIN_READ_%d %s [%d]: POS=%u (%s:%d);; BACK SEARCH TAILPOS=%u, READTAIL=%d ; INDEL_IN_CONF=%d ; READ_COV=%d~%d\n", 1+is_second_read, explain_context.read_name, best_read_id, current_result -> selected_position,q_res_chro,q_res_offset, back_search_tail_position, back_search_read_tail, current_result -> indels_in_confident_coverage, front_search_read_start, back_search_read_tail);
}
search_events_to_back(global_context, thread_context, &explain_context, read_text , qual_text, back_search_tail_position , back_search_read_tail, 0, 0, 1);
......@@ -2683,6 +2681,9 @@ unsigned int explain_read(global_context_t * global_context, thread_context_t *
else*/
int realignment_number = finalise_explain_CIGAR(global_context, thread_context, &explain_context, final_realignments);
if(0 && FIXLENstrcmp("SRR3439488.572382", explain_context.read_name)==0)
SUBREADprintf("TRYING_REALIGN:%s:%u\n", explain_context.read_name, explain_context.total_tries);
return realignment_number;
}
......
......@@ -23,6 +23,8 @@
#include "hashtable.h"
#include "core.h"
#define REALIGN_TOTAL_TRIES 50
#define FUNKY_FRAGMENT_A 1 // same strand and gapped (0<gap<tra_len)
#define FUNKY_FRAGMENT_BC 2 // very far far away (>=tra_len) or chimeric.
#define FUNKY_FRAGMENT_DE 4 // tlen < tra_len and strand jumpped
......@@ -67,6 +69,7 @@ typedef struct{
int all_back_alignments;
int all_front_alignments;
int known_junctions;
unsigned int total_tries;
// unsigned int tmp_jump_length;
// unsigned int best_jump_length;
......
This diff is collapsed.
......@@ -459,8 +459,17 @@ typedef struct{
int output_buffer_item;
int output_buffer_pointer;
int is_finished;
unsigned int all_mapped_reads;
subread_lock_t output_lock;
unsigned int all_mapped_reads;
unsigned int not_properly_pairs_wrong_arrangement;
unsigned int not_properly_pairs_different_chro;
unsigned int not_properly_different_strands;
unsigned int not_properly_pairs_TLEN_wrong;
unsigned int all_unmapped_reads;
unsigned int not_properly_pairs_only_one_end_mapped;
unsigned int all_multimapping_reads;
unsigned int all_uniquely_mapped_reads;
} thread_context_t;
......@@ -516,12 +525,21 @@ typedef struct{
double timecost_for_realign;
unsigned long long all_processed_reads;
unsigned long long all_mapped_reads;
unsigned long long all_correct_PE_reads;
unsigned int all_junctions;
unsigned int all_fusions;
unsigned int all_indels;
unsigned int all_mapped_reads;
unsigned int not_properly_pairs_wrong_arrangement;
unsigned int not_properly_pairs_different_chro;
unsigned int not_properly_different_strands;
unsigned int not_properly_pairs_TLEN_wrong;
unsigned int all_unmapped_reads;
unsigned int not_properly_pairs_only_one_end_mapped;
unsigned int all_multimapping_reads;
unsigned int all_uniquely_mapped_reads;
unsigned long long current_circle_start_abs_offset_file1;
gene_inputfile_position_t current_circle_start_position_file1;
gene_inputfile_position_t current_circle_start_position_file2;
......@@ -547,6 +565,7 @@ typedef struct{
subread_read_number_t read_block_start;
char * exonic_region_bitmap;
HashTable * sam_chro_to_anno_chr_alias;
HashTable * annotation_chro_table;
} global_context_t;
......
......@@ -6,7 +6,7 @@
* Released to the public domain.
*
*--------------------------------------------------------------------------
* $Id: hashtable.c,v 9999.14 2017/03/10 00:01:40 cvs Exp $
* $Id: hashtable.c,v 9999.17 2017/04/28 06:30:27 cvs Exp $
\*--------------------------------------------------------------------------*/
#include <stdio.h>
......@@ -15,7 +15,12 @@
#include <assert.h>
#include <pthread.h>
#include "hashtable.h"
#include "core.h"
static int pointercmp(const void *pointer1, const void *pointer2);
static unsigned long pointerHashFunction(const void *pointer);
static int isProbablePrime(long number);
static long calculateIdealNumOfBuckets(HashTable *hashTable);
ArrayList * ArrayListCreate(int init_capacity){
......@@ -43,8 +48,12 @@ void * ArrayListGet(ArrayList * list, long n){
int ArrayListPush(ArrayList * list, void * new_elem){
if(list -> capacityOfElements <= list->numOfElements){
list -> capacityOfElements *=1.3;
list -> elementList=realloc(list -> elementList, list -> capacityOfElements);
if(list -> capacityOfElements *1.3 > list -> capacityOfElements + 10)
list -> capacityOfElements = list -> capacityOfElements *1.3;
else list -> capacityOfElements = list -> capacityOfElements + 10;
list -> elementList=realloc(list -> elementList, sizeof(void *) * list -> capacityOfElements);
assert(list -> elementList);
}
list->elementList[list->numOfElements++] = new_elem;
return list->numOfElements;
......@@ -53,12 +62,53 @@ void ArrayListSetDeallocationFunction(ArrayList * list, void (*elem_deallocator
list -> elemDeallocator = elem_deallocator;
}
int ArrayListSort_compare(void * sortdata0, int L, int R){
void ** sortdata = sortdata0;
ArrayList * list = sortdata[0];
int (*comp_elems)(void * L_elem, void * R_elem) = sortdata[1];
static int pointercmp(const void *pointer1, const void *pointer2);
static unsigned long pointerHashFunction(const void *pointer);
static int isProbablePrime(long number);
static long calculateIdealNumOfBuckets(HashTable *hashTable);
void * L_elem = list -> elementList[L];