New upstream version 1.6.1+dfsg

parent e99d951c
This diff is collapsed.
......@@ -47,6 +47,7 @@
#include "subread.h"
#include "input-files.h"
#include "seek-zlib.h"
#include "gene-algorithms.h"
#include "HelperFunctions.h"
......@@ -351,7 +352,7 @@ int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char
int cigar_cursor=0;
unsigned short current_section_chro_len=0, current_section_start_read_pos = 0, read_cursor = 0;
unsigned int chromosome_cursor=first_pos;
int ret=0;
int ret=0, is_first_S = 1;
for(cigar_cursor=0; ; cigar_cursor++)
{
......@@ -363,8 +364,10 @@ int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char
}
else
{
if(ch == 'S')
if(ch == 'S'){
if(is_first_S) current_section_start_read_pos = tmp_int;
read_cursor += tmp_int;
}
else if(ch == 'M' || ch == 'X' || ch == '=') {
read_cursor += tmp_int;
current_section_chro_len += tmp_int;
......@@ -391,6 +394,7 @@ int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char
}
//printf("C=%c, TV=%d, CC=%d, RC=%d\n", ch, tmp_int, chromosome_cursor, current_section_chro_len);
tmp_int = 0;
is_first_S = 0;
}
if(cigar_cursor>100) return -1;
}
......@@ -880,10 +884,10 @@ int rand_str(char * str_buff){
}
int mathrand_str(char * str_buff){
srand((int)(miltime()*100));
myrand_srand((int)(miltime()*100));
int x1;
for(x1 = 0; x1 < 6; x1++){
sprintf(str_buff+2*x1, "%02X", rand() & 0xff );
sprintf(str_buff+2*x1, "%02X", myrand_rand() & 0xff );
}
return 0;
}
......@@ -986,9 +990,10 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
void * context, int do_add_feature(char * gene_name, char * transcript_name, char * chro_name, unsigned int start, unsigned int end, int is_negative_strand, void * context) ){
char * file_line = malloc(MAX_LINE_LENGTH+1);
int lineno = 0, is_GFF_txid_warned = 0, is_GFF_geneid_warned = 0, loaded_features = 0;
FILE * fp = fopen(file_name, "r");
autozip_fp afp;
int aret = autozip_open(file_name, &afp);
if(NULL == fp){
if(aret < 0){
SUBREADprintf("Error: unable to open the annotation file : %s\n", file_name);
return -1;
}
......@@ -1000,8 +1005,8 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
feature_name = feature_name_tmp;
unsigned int start = 0, end = 0;
char * getres = fgets(file_line, MAX_LINE_LENGTH, fp);
if(getres == NULL) break;
aret = autozip_gets(&afp, file_line, MAX_LINE_LENGTH);
if(aret < 1) break;
lineno++;
if(is_comment_line(file_line, file_type, lineno-1))continue;
......@@ -1121,7 +1126,7 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
}
}
fclose(fp);
autozip_close(&afp);
free(file_line);
return loaded_features;
}
......
......@@ -16,7 +16,8 @@ ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped coverageCount
mkdir -p ../bin/utilities
mv sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount subtools qualityScores propmapped subread-fullscan removeDup ../bin/utilities
@echo
@echo "###########################################################"
......@@ -28,10 +29,10 @@ all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel
@echo "###########################################################"
@echo
sublong: longread-mapping/longread-mapping.c ${ALL_OBJECTS}
echo "MACOS= -D FREEBSD " > longread-mapping/make.version
rm -f longread-mapping/*.o
cd longread-mapping && $(MAKE)
sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
echo "MACOS= -D FREEBSD " > longread-one/make.version
rm -f longread-one/*.o
cd longread-one && $(MAKE)
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -14,10 +14,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # samMappedBases mergeVCF testZlib
all: detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # samMappedBases mergeVCF testZlib
mkdir -p ../bin/utilities
mv sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount propmapped qualityScores removeDup subread-fullscan txUnique ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv detectionCall repair coverageCount propmapped qualityScores removeDup subread-fullscan txUnique ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -28,10 +29,13 @@ all: sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex
@echo "###########################################################"
@echo
sublong: longread-mapping/longread-mapping.c ${ALL_OBJECTS}
echo " " > longread-mapping/make.version
rm -f longread-mapping/*.o
cd longread-mapping && $(MAKE)
sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
echo " " > longread-one/make.version
rm -f longread-one/*.o
cd longread-one && $(MAKE)
detectionCall: detection-calls.c ${ALL_OBJECTS}
${CC} -o detectionCall detection-calls.c ${ALL_OBJECTS} ${LDFLAGS}
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -13,7 +13,8 @@ ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # globalReassembly testZlib
mkdir -p ../bin/utilities
mv sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount subread-fullscan qualityScores removeDup propmapped ../bin/utilities
@echo
@echo "###########################################################"
......@@ -25,10 +26,10 @@ all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel
@echo "###########################################################"
@echo
sublong: longread-mapping/longread-mapping.c ${ALL_OBJECTS}
echo "MACOS= -D MACOS " > longread-mapping/make.version
rm -f longread-mapping/*.o
cd longread-mapping && $(MAKE)
sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
echo "MACOS= -D MACOS " > longread-one/make.version
rm -f longread-one/*.o
cd longread-one && $(MAKE)
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -96,6 +96,7 @@ struct SNP_Calling_Parameters{
unsigned int real_read_count;
unsigned int reported_SNPs;
unsigned int reported_indels;
int supporting_read_excessed_reported;
};
#define PRECALCULATE_FACTORIAL 2000000
......@@ -212,17 +213,16 @@ int is_snp_bitmap(char * bm, unsigned int pos)
return bm [bytes] & (1 << bits);
}
void put_hash_to_pile(HashTable * merge_table, unsigned int* snp_voting_piles, struct SNP_Calling_Parameters * parameters)
void put_hash_to_pile(HashTable * merge_table, unsigned int* snp_voting_piles, struct SNP_Calling_Parameters * parameters, char * chro, int block_start )
{
int bucket;
KeyValuePair * cursor;
for(bucket=0; bucket< merge_table -> numOfBuckets; bucket++)
{
for(bucket=0; bucket< merge_table -> numOfBuckets; bucket++) {
cursor = merge_table -> bucketArray[bucket];
while (1)
{
while (1) {
if (!cursor) break;
int base_N_qual = cursor->value - NULL;
int POI_pos = cursor->key - NULL - 100;
......@@ -232,20 +232,20 @@ void put_hash_to_pile(HashTable * merge_table, unsigned int* snp_voting_piles, s
int j;
unsigned int supporting_bases = 0;
for(j=0; j<4; j++)
{
for(j=0; j<4; j++) {
supporting_bases += snp_voting_piles[POI_pos*4+j];
}
if(supporting_bases < parameters->max_supporting_read_number)
{
if(qual1 < (parameters -> is_phred_64?'@':'!')+parameters->min_phred_score) // low quality bases
{
if(supporting_bases < parameters->max_supporting_read_number) {
if(qual1 < (parameters -> is_phred_64?'@':'!')+parameters->min_phred_score){ // low quality bases
// the low-seq-quality bases are not inclued in voting.
}
else
{
snp_voting_piles[POI_pos*4+base1]+=1;
} else snp_voting_piles[POI_pos*4+base1]+=1;
}else{
if(parameters -> supporting_read_excessed_reported < 100){
parameters -> supporting_read_excessed_reported++;
SUBREADprintf("Warning: the depth exceeded the limit of %d at %s : %d\n", parameters->max_supporting_read_number, chro , block_start + POI_pos);
if(parameters -> supporting_read_excessed_reported == 100)
SUBREADprintf("Too many warnings.\nNo more warning messages are reported.\n");
}
}
......@@ -254,7 +254,7 @@ void put_hash_to_pile(HashTable * merge_table, unsigned int* snp_voting_piles, s
}
}
int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, char ** SNP_bitmap_recorder, unsigned int * snp_voting_piles, int block_no, unsigned int reference_len, char * referenced_genome)
int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, char ** SNP_bitmap_recorder, unsigned int * snp_voting_piles, int block_no, unsigned int reference_len, char * referenced_genome, char * chro_name, unsigned int offset)
{
int last_read_id=-1,i;
HashTable * merge_table = HashTableCreate(1000);
......@@ -314,7 +314,7 @@ int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, ch
if(parameters->is_paired_end_data){
if( (last_read_id >>1) != (read_rec.read_number>>1) && last_read_id>=0 && merge_table -> numOfElements > 0)
{
put_hash_to_pile(merge_table, snp_voting_piles, parameters);
put_hash_to_pile(merge_table, snp_voting_piles, parameters,chro_name, offset);
HashTableDestroy(merge_table);
merge_table = HashTableCreate(1000);
}
......@@ -403,6 +403,13 @@ int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, ch
{
snp_voting_piles[(first_base_pos+i-1)*4+base_int]+=1;
}
}else{
if(parameters -> supporting_read_excessed_reported < 100){
parameters -> supporting_read_excessed_reported++;
SUBREADprintf("Warning: the depth exceeded the limit of %d at %s : %d\n", parameters->max_supporting_read_number, chro_name , offset + first_base_pos+i);
if(parameters -> supporting_read_excessed_reported == 100)
SUBREADprintf("Too many warnings.\nNo more warning messages are reported.\n");
}
}
}
}
......@@ -411,7 +418,7 @@ int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, ch
}
if(parameters->is_paired_end_data && merge_table -> numOfElements > 0)
put_hash_to_pile(merge_table, snp_voting_piles, parameters);
put_hash_to_pile(merge_table, snp_voting_piles, parameters, chro_name, offset);
HashTableDestroy(merge_table);
......@@ -637,7 +644,7 @@ int process_snp_votes(FILE *out_fp, unsigned int offset , unsigned int reference
pcutoff_list[i]=-1.;
}
int read_is_error = read_tmp_block(parameters, tmp_fp,&SNP_bitmap_recorder,snp_voting_piles,block_no, reference_len, referenced_genome);
int read_is_error = read_tmp_block(parameters, tmp_fp,&SNP_bitmap_recorder,snp_voting_piles,block_no, reference_len, referenced_genome, chro_name, offset);
fclose(tmp_fp);
if (parameters -> delete_piles)
......@@ -659,7 +666,7 @@ int process_snp_votes(FILE *out_fp, unsigned int offset , unsigned int reference
{
int min_phred_score_raw = parameters -> min_phred_score;
parameters -> min_phred_score -= 3;
read_tmp_block(parameters, tmp_fp,NULL , snp_BGC_piles,block_no, reference_len, referenced_genome);
read_tmp_block(parameters, tmp_fp,NULL , snp_BGC_piles,block_no, reference_len, referenced_genome, chro_name, offset);
parameters -> min_phred_score = min_phred_score_raw;
}
fclose(tmp_fp);
......@@ -1509,9 +1516,10 @@ void print_usage_snp(char * myname)
SUBREADputs("Optional arguments:");
SUBREADputs("");
SUBREADputs(" -a <file> Provide a set of annotated SNPs (e.g. SNPs included in the dbSNP");
SUBREADputs(" database). The supplied file should be in VCF format. Providing");
SUBREADputs(" known SNPs to the program should improve its SNP calling");
SUBREADputs(" performance. Note that the provided SNPs may or may not be called.");
SUBREADputs(" database). The supplied file should be in VCF format (gzipped file");
SUBREADputs(" is accepted). Providing known SNPs to the program should improve");
SUBREADputs(" its SNP calling performance. Note that the provided SNPs may or");
SUBREADputs(" may not be called.");
SUBREADputs("");
SUBREADputs(" -b Indicate the input file provided via -i is in BAM format.");
SUBREADputs("");
......@@ -1529,10 +1537,9 @@ void print_usage_snp(char * myname)
SUBREADputs(" -r <int> Specify the minimum number of mapped reads a SNP-containing");
SUBREADputs(" location must have (ie. the minimum coverage). 1 by default.");
SUBREADputs("");
SUBREADputs(" -x <int> Specify the maximum number of mapped reads a SNP-containing");
SUBREADputs(" location have have. 1000 by default. Any location having more than");
SUBREADputs(" the threshold number of reads will not be considered for SNP");
SUBREADputs(" calling. This option is useful for removing PCR artefacts.");
SUBREADputs(" -x <int> Specify the maximum depth a SNP location is allowed to have.");
SUBREADputs(" 1,000,000 reads by default. This option is useful for removing PCR");
SUBREADputs(" artefacts.");
SUBREADputs("");
SUBREADputs(" -s <int> Specify the minimum base quality scores (Phred scores) read bases");
SUBREADputs(" must satisfy to be used for SNP calling. 13 by default. Read bases");
......@@ -1604,7 +1611,7 @@ int main_snp_calling_test(int argc,char ** argv)
parameters.supporting_read_rate = 0.;
parameters.min_supporting_read_number = 1;
parameters.min_alternative_read_number = 1;
parameters.max_supporting_read_number = 1000;
parameters.max_supporting_read_number = 1000000;
parameters.neighbour_filter_testlen = -1;
parameters.neighbour_filter_rate = 0.000000001;
parameters.min_phred_score = 13;
......
......@@ -326,7 +326,7 @@ int main(int argc, char ** argv)
struct timeval xtime;
gettimeofday(&xtime,NULL);
srand(time(NULL)^xtime.tv_usec);
myrand_srand(time(NULL)^xtime.tv_usec);
......
......@@ -366,7 +366,7 @@ int transfer_SAM_to_position_table(char * sam_file)
int is_paired_end_reads = 0;
unsigned long long int file_offset = ftello(input_file.input_fp);
if(feof(input_file.input_fp))break;
if(feof((FILE *)input_file.input_fp))break;
read_text[0]=0;
qual_text[0]=0;
......
......@@ -406,7 +406,7 @@ void remove_sorted_neighbours(global_context_t * global_context)
merge_sort(sort_data, indel_context->total_events, event_neighbour_sort_compare, event_neighbour_sort_exchange, event_neighbour_sort_merge);
int xk2, position_delta, maxinum_removed_events = global_context-> config.do_fusion_detection? 9999999:1999999;
int xk2, position_delta, maxinum_removed_events =(global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection)? 9999999:1999999;
position_delta = 10;
int * to_be_removed_ids = malloc(sizeof(int) * (1+maxinum_removed_events)), to_be_removed_number=0;
......@@ -477,7 +477,7 @@ void remove_neighbour(global_context_t * global_context)
int xk1;
int * to_be_removed_ids;
int to_be_removed_number = 0, all_junctions = 0;
int maxinum_removed_events = global_context-> config.do_fusion_detection? 9999999:1999999;
int maxinum_removed_events =(global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection)? 9999999:1999999;
to_be_removed_ids = malloc(sizeof(int) * (1+maxinum_removed_events));
......@@ -564,7 +564,7 @@ void remove_neighbour(global_context_t * global_context)
}
}
if(1 && global_context->config.do_fusion_detection && event_body -> event_type == CHRO_EVENT_TYPE_FUSION)
if((1 && global_context-> config.do_fusion_detection || 1 && global_context-> config.do_long_del_detection) && event_body -> event_type == CHRO_EVENT_TYPE_FUSION)
{
for(xk2=-10 ; xk2 < 10 ; xk2++)
{
......@@ -1913,9 +1913,11 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
dyna_steps = core_dynamic_align(global_context, thread_context, read_text + last_correct_base, first_correct_base - last_correct_base, voting_position + last_correct_base + last_indel, movement_buffer, indels, read_name);
movement_buffer[dyna_steps]=0;
if(0 && FIXLENstrcmp("simulated.2467286", read_name) == 0){
SUBREADprintf("DYNAMIC: indels=%d, %s\n", indels, movement_buffer);
}
//#warning ">>>>>>> COMMENT NEXT BLOCK IN RELEASE <<<<<<<<"
if(0) {
......@@ -1943,7 +1945,6 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
int mv=movement_buffer[x1];
if(mv==3) total_mismatch++;
}
// if(pair_number==4)printf("XK3==%d\n", total_mismatch);
if(total_mismatch<=2 || (global_context->config.maximise_sensitivity_indel && total_mismatch <= 2 ))
for(x1=0; x1<dyna_steps;x1++)
......@@ -1960,24 +1961,9 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
}
else if ( is_in_indel && (mv == 0 || mv == 3) )
{
//if(pair_number == 17296)
//printf("R%d : NEW INDEL: %u; len = %d\n", is_second_read, indel_left_boundary - 1 , current_indel_len);
//#ifdef indel_debug
//#warning "=========== COMMENT THIS LINE ==============="
//printf("NEW INDEL: %u; len = %d\n", indel_left_boundary - 1 , current_indel_len);
//#endif
// let's test if it is ambiguous:
gene_value_index_t * current_value_index = thread_context?thread_context->current_value_index:global_context->current_value_index;
//#warning ">>>>>>> COMMENT NEXT BLOCK IN RELEASE <<<<<<<<"
if(0 && FIXLENstrcmp("D00491:277:C89FUANXX:7:1110:20418:31541",read_name ) == 0){
SUBREADprintf("INDEL_DDADD: abs(I=%d); INDELS=%d; PN=%d; LOC=%ul READ_CUR=%d\n",i, current_indel_len, pair_number, indel_left_boundary-1, cursor_on_read);
}
int ambiguous_count=0;
//#warning " >>>>>>>> MAKE SURE DISABLING THE NEXT BLOCK IS HARMLESS <<<<<<<<< "
if(0){
int ambiguous_i, best_matched_bases = match_chro(read_text + cursor_on_read - 6, current_value_index, indel_left_boundary - 6, 6, 0, global_context->config.space_type) +
match_chro(read_text + cursor_on_read - min(current_indel_len,0), current_value_index, indel_left_boundary + max(0, current_indel_len), 6, 0, global_context->config.space_type);
......@@ -1993,6 +1979,9 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
{
int old_event_id = -1;
if(0)if(indel_left_boundary >= 44438630 + 1210 - 200 && indel_left_boundary <= 44438630 + 1210 + 200 && current_indel_len == 6){
SUBREADprintf("ADD AN INDEL: %s : %u ; len = %u\n", read_name, indel_left_boundary, current_indel_len);
}
chromosome_event_t * new_event = local_add_indel_event(global_context, thread_context, event_table, read_text + cursor_on_read + min(0,current_indel_len), indel_left_boundary - 1, current_indel_len, 1, ambiguous_count, 0, &old_event_id);
mark_gapped_read(current_result);
......@@ -2285,7 +2274,7 @@ int write_indel_final_results(global_context_t * global_context)
FILE * ofp = NULL;
fn2 = malloc(MAX_FILE_NAME_LENGTH);
snprintf(fn2, MAX_FILE_NAME_LENGTH, "%s.indel", global_context->config.output_prefix);
snprintf(fn2, MAX_FILE_NAME_LENGTH, "%s.indel.vcf", global_context->config.output_prefix);
ofp = f_subr_open(fn2, "wb");
......@@ -4418,8 +4407,6 @@ int finalise_long_insertions(global_context_t * global_context)
void init_global_context(global_context_t * context)
{
srand(time(NULL));
memset(context->module_contexts, 0, 5*sizeof(void *));
memset(&context->config, 0, sizeof(configuration_t));
......@@ -4428,6 +4415,7 @@ void init_global_context(global_context_t * context)
context->config.report_sam_file = 1;
context->config.do_breakpoint_detection = 0;
context->config.do_fusion_detection = 0;
context->config.do_long_del_detection = 0;
context->config.do_structural_variance_detection = 0;
context->config.more_accurate_fusions = 1;
context->config.report_multi_mapping_reads = 0;
......@@ -4542,7 +4530,7 @@ void init_global_context(global_context_t * context)
int seed_rand[2];
double double_time = miltime();
memcpy(seed_rand, &double_time, 2*sizeof(int));
srand(seed_rand[0]^seed_rand[1]);
myrand_srand(seed_rand[0]^seed_rand[1]);
context->config.max_indel_length = 5;
context->config.phred_score_format = FASTQ_PHRED33;
......
......@@ -48,6 +48,7 @@ static struct option long_options[] =
{"SAMinput", no_argument, 0, 0},
{"reportPairedMultiBest", no_argument, 0, 0},
{"sv", no_argument, 0, 0},
{"longDel", no_argument, 0, 0},
{"extraColumns", no_argument, 0, 0},
{"forcedPE", no_argument, 0, 0},
{"ignoreUnmapped", no_argument, 0, 0},
......@@ -196,8 +197,9 @@ void print_usage_core_aligner()
SUBREADputs("");
SUBREADputs("# gene annotation");
SUBREADputs("");
SUBREADputs(" -a Name of an annotation file. GTF/GFF format by default. See");
SUBREADputs(" -F option for more format information.");
SUBREADputs(" -a Name of an annotation file (gzipped file is accepted).");
SUBREADputs(" GTF/GFF format by default. See -F option for more format");
SUBREADputs(" information.");
SUBREADputs("");
SUBREADputs(" -F Specify format of the provided annotation file. Acceptable");
SUBREADputs(" formats include 'GTF' (or compatible GFF format) and");
......@@ -484,6 +486,11 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
}
else if(strcmp("BAMinput", long_options[option_index].name)==0)
{
if(global_context->config.is_SAM_file_input && global_context->config.is_BAM_input == 0){
SUBREADprintf("ERROR: you cannot specify both SAMinput and BAMinput\n");
return -1;
}
global_context->config.is_BAM_input = 1;
global_context->config.is_SAM_file_input = 1;
}
......@@ -493,6 +500,11 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
}
else if(strcmp("SAMinput", long_options[option_index].name)==0)
{
if(global_context->config.is_SAM_file_input && global_context->config.is_BAM_input == 1){
SUBREADprintf("ERROR: you cannot specify both SAMinput and BAMinput\n");
return -1;
}
global_context->config.is_BAM_input = 0;
global_context->config.is_SAM_file_input = 1;
}
......@@ -515,6 +527,12 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
global_context->config.prefer_donor_receptor_junctions = 0;
//global_context->config.do_big_margin_filtering_for_reads = 1;
}
else if(strcmp("longDel", long_options[option_index].name)==0)
{
global_context->config.do_breakpoint_detection = 1;
global_context->config.do_long_del_detection = 1;
global_context->config.prefer_donor_receptor_junctions = 0;
}
else if(strcmp("minMappedLength", long_options[option_index].name)==0)
{
if(!is_valid_digit(optarg, "minMappedLength"))
......@@ -553,7 +571,7 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
{
global_context->config.maximise_sensitivity_indel = 1;
global_context->config.realignment_minimum_variant_distance = 1;
global_context->config.max_indel_length = 16;
// global_context->config.max_indel_length = max(16, global_context->config.max_indel_length);
}
else if(strcmp("minVoteCutoff", long_options[option_index].name)==0)
{
......@@ -577,7 +595,7 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
if(global_context->config.reported_multi_best_reads > 1 && ! global_context->config.report_multi_mapping_reads)
SUBREADprintf("WARNING: You required multi best alignments, but disallowed multi-mapping reads. You need to turn on the multi-mapping option.\n");
global_context->config.more_accurate_fusions = global_context->config.more_accurate_fusions && global_context->config.do_fusion_detection;
global_context->config.more_accurate_fusions = global_context->config.more_accurate_fusions && (global_context->config.do_fusion_detection || global_context->config.do_long_del_detection);
if(global_context->config.more_accurate_fusions)
{
global_context->config.high_quality_base_threshold = 999999;
......
......@@ -199,8 +199,9 @@ void print_usage_core_subjunc()
SUBREADputs("");
SUBREADputs("# gene annotation");
SUBREADputs("");
SUBREADputs(" -a Name of an annotation file. GTF/GFF format by default. See");
SUBREADputs(" -F option for more format information.");
SUBREADputs(" -a Name of an annotation file (gzipped file is accepted).");
SUBREADputs(" GTF/GFF format by default. See -F option for more format");
SUBREADputs(" information.");
SUBREADputs("");
SUBREADputs(" -F Specify format of the provided annotation file. Acceptable");
SUBREADputs(" formats include 'GTF' (or compatible GFF format) and");
......@@ -498,11 +499,19 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
}
else if(strcmp("BAMinput", long_options[option_index].name)==0)
{
if(global_context->config.is_SAM_file_input && global_context->config.is_BAM_input == 0){
SUBREADprintf("Error: you cannot specify both SAMinput and BAMinput\n");
return -1;
}
global_context->config.is_BAM_input = 1;
global_context->config.is_SAM_file_input = 1;
}
else if(strcmp("SAMinput", long_options[option_index].name)==0)
{
if(global_context->config.is_SAM_file_input && global_context->config.is_BAM_input == 1){
SUBREADprintf("Error: you cannot specify both SAMinput and BAMinput\n");
return -1;
}
global_context->config.is_BAM_input = 0;
global_context->config.is_SAM_file_input = 1;
}
......@@ -583,7 +592,7 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
{
global_context->config.maximise_sensitivity_indel = 1;
global_context->config.realignment_minimum_variant_distance = 1;
global_context->config.max_indel_length = 16;
// global_context->config.max_indel_length = 16;
}
else if(strcmp("disableBigMargin", long_options[option_index].name)==0)
{
......
This diff is collapsed.
......@@ -159,4 +159,5 @@ void finalise_structural_variances(global_context_t * global_context);
void debug_show_event(global_context_t* global_context, chromosome_event_t * event);
void get_event_two_coordinates(global_context_t * global_context, unsigned int event_no, char ** small_chro, int * small_pos, unsigned int * small_abs, char ** large_chro, int * large_pos, unsigned int * large_abs);
int get_offset_maximum_chro_pos(global_context_t * global_context, thread_context_t * thread_context, unsigned int linear);
#endif
This diff is collapsed.
......@@ -215,6 +215,7 @@ typedef struct{
// subfusion
int do_fusion_detection;
int do_long_del_detection;
int do_structural_variance_detection;
int prefer_donor_receptor_junctions;
int more_accurate_fusions;
......
......@@ -18,6 +18,8 @@ typedef unsigned int coverage_bin_entry_t;
int is_BAM_input = 0;
int max_M = 10;
int paired_end = 0;
int ignore_bad_pair = 0;
int base_values = 0;
char input_file_name[300];
char output_file_name[300];
HashTable * cov_bin_table;
......@@ -100,7 +102,7 @@ void add_chro(char *sam_h)
}
void ** bin_entry = malloc(sizeof(void *)*2);
coverage_bin_entry_t * chro_bin = calloc(sizeof(coverage_bin_entry_t) , chro_len);
coverage_bin_entry_t * chro_bin = calloc(sizeof(coverage_bin_entry_t) , chro_len * ( base_values?5:1 ));
if(!chro_bin)
{
SUBREADprintf("ERROR: cannot allocate the memory block. You need at least 4GB of memory,\n");
......@@ -111,7 +113,7 @@ void add_chro(char *sam_h)
SUBREADprintf("Added a new chromosome : %s [%u]\n", chro_name, chro_len);
}
void get_read_info(char * fl, char * chro, unsigned int * pos , char * cigar, int *flags){
void get_read_info(char * fl, char * chro, unsigned int * pos_0base , char * cigar, int *flags, int * tlen, char ** rtext){
char * tmp_tok = NULL;
char * tmp_res = strtok_r(fl, "\t", &tmp_tok);
......@@ -122,12 +124,28 @@ void get_read_info(char * fl, char * chro, unsigned int * pos , char * cigar, in
tmp_res = strtok_r(NULL, "\t", &tmp_tok); // chro
strcpy(chro, tmp_res);
tmp_res = strtok_r(NULL, "\t", &tmp_tok); // pos
(*pos) = atoi(tmp_res);
tmp_res = strtok_r(NULL, "\t", &tmp_tok); // pos_0base
(*pos_0base) = atoi(tmp_res);
tmp_res = strtok_r(NULL, "\t", &tmp_tok); // qual
tmp_res = strtok_r(NULL, "\t", &tmp_tok); // cigar
strcpy(cigar, tmp_res);
tmp_res = strtok_r(NULL, "\t", &tmp_tok); // mate chro
tmp_res = strtok_r(NULL, "\t", &tmp_tok); // mate pos
tmp_res = strtok_r(NULL, "\t", &tmp_tok); // TLEN
(*tlen) = atoi(tmp_res);
if(rtext)
*rtext = strtok_r(NULL, "\t", &tmp_tok);
}
int base_value_to_int(char b){
if(b=='A') return 1;
if(b=='C') return 2;
if(b=='G') return 3;
if(b=='T') return 4;
return 0;
}
int covCalc()
......@@ -138,9 +156,9 @@ int covCalc()
HashTableSetKeyComparisonFunction(cov_bin_table , fc_strcmp_chro);
HashTableSetDeallocationFunctions(cov_bin_table , free, free);
unsigned int hit_locs[2][MAX_FRAGMENT_LENGTH], reads = 0;
unsigned int this_hit_locs[MAX_FRAGMENT_LENGTH], reads = 0;
char this_hit_bases[MAX_FRAGMENT_LENGTH];
coverage_bin_entry_t * chrbin12[2];
int hits1,hits2;
SamBam_FILE * in_fp = SamBam_fopen(input_file_name, is_BAM_input?SAMBAM_FILE_BAM:SAMBAM_FILE_SAM);
char * line_buffer = malloc(3000);
......@@ -151,25 +169,29 @@ int covCalc()
if(line_buffer[0]=='@'){
if(strstr(line_buffer,"@SQ\t"))
add_chro(line_buffer);
}
else
{
} else {
char * Chros[ max_M ];
unsigned int Staring_Points[max_M];
unsigned short Staring_Read_Points[max_M];
unsigned short Section_Lengths[max_M];
int flags=0, x1, is_junc = 0;
int flags=0, x1, is_junc = 0, tlen = 0;
char cigar_str[200];
char chro[200];
unsigned int pos = 0;
char chro[200], *rtext = NULL;
unsigned int pos_0base = 0;
cigar_str[0]=0;
chro[0]=0;
int this_hits = 0;
if(reads % 2 == 0) hits1= hits2 = 0;
get_read_info(line_buffer, chro, &pos, cigar_str, &flags);
get_read_info(line_buffer, chro, &pos_0base, cigar_str, &flags, &tlen, &rtext);
pos_0base --;
if(flags & 4) continue;
if(0 && FIXLENstrcmp("NS500643:166:HNCCHBGXY:1:23105:1714:15599" , line_buffer)==0)
SUBREADprintf("INPUT READ_%d: FLAG=%d, chro=%s, pos=%u\n", 1+(reads % 2 ), flags, chro, pos_0base);
if((flags & 4) || ( paired_end && (tlen == 0 || abs(tlen) > 500000) )){
reads++;
continue;
}
void ** bin_entry = HashTableGet(cov_bin_table, chro);
if(NULL == bin_entry)
......@@ -179,10 +201,8 @@ int covCalc()
coverage_bin_entry_t * chrbin = (coverage_bin_entry_t*) bin_entry[0];
unsigned int chrlen = (void *)( bin_entry[1]) - NULL;
int cigar_sections = RSubread_parse_CIGAR_string(chro, pos, cigar_str, max_M, Chros, Staring_Points, Staring_Read_Points, Section_Lengths, &is_junc);
int cigar_sections = RSubread_parse_CIGAR_string(chro, pos_0base, cigar_str, max_M, Chros, Staring_Points, Staring_Read_Points, Section_Lengths, &is_junc);
if(paired_end) {
int * this_hits = (reads%2)?&hits2:&hits1;
unsigned int * this_hit_locs = &(hit_locs[reads%2][0]);
for(x1 = 0; x1 < cigar_sections; x1++){