Imported Upstream version 1.5.0-p1+dfsg

parent 32f5127f
This diff is collapsed.
......@@ -237,12 +237,13 @@ int read_contig_fasta(fasta_contigs_t * tab, char * fname){
}
fclose(fp);
return 0;
}
return 1;
}
int RSubread_parse_CIGAR_Extra_string(int FLAG, char * MainChro, unsigned int MainPos, const char * CIGAR_Str, const char * Extra_Tags, char ** Chros, unsigned int * Staring_Chro_Points, unsigned short * Section_Start_Read_Pos, unsigned short * Section_Length, int * is_junction_read){
int ret = RSubread_parse_CIGAR_string(MainChro, MainPos, CIGAR_Str, Chros, Staring_Chro_Points, Section_Start_Read_Pos, Section_Length, is_junction_read);
int RSubread_parse_CIGAR_Extra_string(int FLAG, char * MainChro, unsigned int MainPos, const char * CIGAR_Str, const char * Extra_Tags, int max_M, char ** Chros, unsigned int * Staring_Chro_Points, unsigned short * Section_Start_Read_Pos, unsigned short * Section_Length, int * is_junction_read){
int ret = RSubread_parse_CIGAR_string(MainChro, MainPos, CIGAR_Str, max_M, Chros, Staring_Chro_Points, Section_Start_Read_Pos, Section_Length, is_junction_read);
char read_main_strand = (((FLAG & 0x40)==0x40) == ((FLAG & 0x10) == 0x10 ))?'-':'+';
int tag_cursor=0;
......@@ -254,7 +255,7 @@ int RSubread_parse_CIGAR_Extra_string(int FLAG, char * MainChro, unsigned int Ma
char current_fusion_char[MAX_CHROMOSOME_NAME_LEN];
unsigned int current_fusion_pos = 0;
char current_fusion_strand = 0;
char current_fusion_cigar[FC_CIGAR_PARSER_ITEMS * 15];
char current_fusion_cigar[max_M * 15];
current_fusion_cigar [0] =0;
current_fusion_char [0]=0;
......@@ -280,7 +281,7 @@ int RSubread_parse_CIGAR_Extra_string(int FLAG, char * MainChro, unsigned int Ma
unsigned int left_pos = current_fusion_pos;
if(current_fusion_strand!=read_main_strand)
left_pos = find_left_end_cigar(current_fusion_pos, current_fusion_cigar);
ret += RSubread_parse_CIGAR_string(current_fusion_char, left_pos, current_fusion_cigar, Chros + ret, Staring_Chro_Points+ ret, Section_Start_Read_Pos+ ret, Section_Length + ret, is_junction_read);
ret += RSubread_parse_CIGAR_string(current_fusion_char, left_pos, current_fusion_cigar, max_M - ret, Chros + ret, Staring_Chro_Points+ ret, Section_Start_Read_Pos+ ret, Section_Length + ret, is_junction_read);
current_fusion_pos = 0;
current_fusion_strand = 0;
......@@ -319,7 +320,7 @@ int RSubread_parse_CIGAR_Extra_string(int FLAG, char * MainChro, unsigned int Ma
return ret;
}
int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char * CIGAR_Str, char ** Section_Chromosomes, unsigned int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos, unsigned short * Section_Chro_Length, int * is_junction_read)
int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char * CIGAR_Str, int max_M, char ** Section_Chromosomes, unsigned int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos, unsigned short * Section_Chro_Length, int * is_junction_read)
{
unsigned int tmp_int=0;
int cigar_cursor=0;
......@@ -339,13 +340,13 @@ int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char
{
if(ch == 'S')
read_cursor += tmp_int;
else if(ch == 'M') {
else if(ch == 'M' || ch == 'X' || ch == '=') {
read_cursor += tmp_int;
current_section_chro_len += tmp_int;
chromosome_cursor += tmp_int;
} else if(ch == 'N' || ch == 'D' || ch=='I' || ch == 0) {
if('N' == ch)(*is_junction_read)=1;
if(ret < FC_CIGAR_PARSER_ITEMS)
if(ret < max_M)
{
if(current_section_chro_len>0)
{
......@@ -355,7 +356,7 @@ int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char
Section_Chro_Length[ret] = current_section_chro_len;
ret ++;
}
}
}else assert(0);
current_section_chro_len = 0;
if(ch == 'I') read_cursor += tmp_int;
else if(ch == 'N' || ch == 'D') chromosome_cursor += tmp_int;
......
......@@ -43,10 +43,10 @@ void destroy_contig_fasta(fasta_contigs_t * tab);
// This function returns the number of sections found in the CIGAR string. It returns -1 if the CIGAR string cannot be parsed.
int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char * CIGAR_Str, char ** Section_Chromosomes, unsigned int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos, unsigned short * Section_Chro_Length, int * is_junction_read);
int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char * CIGAR_Str, int max_M, char ** Section_Chromosomes, unsigned int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos, unsigned short * Section_Chro_Length, int * is_junction_read);
int RSubread_parse_CIGAR_Extra_string(int FLAG, char * MainChro, unsigned int MainPos, const char * CIGAR_Str, const char * Extra_Tags, char ** Chros, unsigned int * Staring_Chro_Points, unsigned short * Section_Start_Read_Pos, unsigned short * Section_Length, int * is_junction_read);
int RSubread_parse_CIGAR_Extra_string(int FLAG, char * MainChro, unsigned int MainPos, const char * CIGAR_Str, const char * Extra_Tags, int max_M, char ** Chros, unsigned int * Staring_Chro_Points, unsigned short * Section_Start_Read_Pos, unsigned short * Section_Length, int * is_junction_read);
// This function try to find the attribute value of a given attribute name from the extra column string in GTF/GFF.
// If the value is found, it returns the length of the value (must be > 0 by definition), or -1 if no attribute is found or the format is wrong.
......
......@@ -8,7 +8,7 @@ LDFLAGS = -pthread -lz -lm ${MACOS} -DMAKE_FOR_EXON -D MAKE_STANDALONE -l compat
CC = gcc ${CCFLAGS} -ggdb -fomit-frame-pointer -ffast-math -funroll-loops -mmmx -msse -msse2 -msse3 -fmessage-length=0
ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge core-bigtable
ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge core-bigtable seek-zlib long-hashtable
ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
......@@ -18,6 +18,15 @@ all: featureCounts removeDup exactSNP subread-buildindex subindel subread-align
mkdir -p ../bin/utilities
mv subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv coverageCount subtools qualityScores propmapped subread-fullscan removeDup ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
@echo "# Installation finished. #"
@echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #"
@echo "###########################################################"
@echo
propmapped: propmapped.c ${ALL_OBJECTS}
${CC} -o propmapped propmapped.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -17,6 +17,15 @@ all: repair featureCounts removeDup exactSNP subread-buildindex subindel subrea
mkdir -p ../bin/utilities
mv subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount propmapped qualityScores removeDup subread-fullscan ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
@echo "# Installation finished. #"
@echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #"
@echo "###########################################################"
@echo
globalReassembly: global-reassembly.c ${ALL_OBJECTS}
${CC} -o globalReassembly global-reassembly.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -15,6 +15,16 @@ all: repair featureCounts removeDup exactSNP subread-buildindex subindel subrea
mkdir -p ../bin/utilities
mv subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount subread-fullscan qualityScores removeDup propmapped ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
@echo "# Installation finished. #"
@echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #"
@echo "###########################################################"
@echo
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -506,7 +506,7 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
flanking_matched = d;
}
// SUBREADprintf("TEST: %s : %u a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d\n", chro_name, i,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail);
//SUBREADprintf("TEST: %u a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d\n", i,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail);
float p_middle = fisher_exact_test(a, flanking_unmatched, c, flanking_matched);
if(all_result_needed || ( p_middle < p_cutoff && flanking_matched*20>(flanking_matched+ flanking_unmatched )*16))
snp_fisher_raw [i] = p_middle;
......@@ -1008,8 +1008,12 @@ int run_chromosome_search(FILE *in_fp, FILE * out_fp, char * chro_name , char *
if( (*task_no) % all_threads == thread_no && all_offset <= chro_len)
{
process_snp_votes(out_fp, all_offset, offset, referenced_base, chro_name , temp_prefix, parameters);
print_in_box(89,0,0,"processed block %c[36m%s@%d%c[0m by thread %d/%d [block number=%d/%d]", CHAR_ESC, chro_name, all_offset, CHAR_ESC , thread_no+1, all_threads, 1+(*task_no)-parameters->empty_blocks, parameters->all_blocks);
//#warning "=== ONLY TEST ONE BLOCK ==="
//if(strcmp(chro_name,"chr7")==0 && all_offset == 45000000){
if(1){
process_snp_votes(out_fp, all_offset, offset, referenced_base, chro_name , temp_prefix, parameters);
print_in_box(89,0,0,"processed block %c[36m%s@%d%c[0m by thread %d/%d [block number=%d/%d]", CHAR_ESC, chro_name, all_offset, CHAR_ESC , thread_no+1, all_threads, 1+(*task_no)-parameters->empty_blocks, parameters->all_blocks);
}
}
else if((*task_no) % all_threads == thread_no)
{
......
......@@ -145,7 +145,7 @@ int transfer_SAM_to_assembly(global_context_t * global_context)
int read_len, is_repeated, mapping_quality, is_anchor_certain=1;
int pos_delta = 0;
if(feof(global_context -> input_reads.first_read_file.input_fp))break;
if(feof((FILE *)global_context -> input_reads.first_read_file.input_fp))break;
read_text[0]=0;
qual_text[0]=0;
......
typedef {
g
} SeekGZ_fp;
typedef {
unsigned int current_block_offset;
char * current_block_compressed;
} SeekGZ_checkpoint;
......@@ -250,10 +250,10 @@ int write_read_pos(char * chro_name, unsigned int chro_offset_anchor, unsigned s
if(window_no != last_window_no)
{
int close_now = 0;
sprintf(temp_file_name,"%s@%s-%04u.bin", temp_file_prefix, chro_name , window_point / BASE_BLOCK_LENGTH_INDEX );
FILE * pileup_fp = get_temp_file_pointer(temp_file_name, pileup_fp_table);
FILE * pileup_fp = get_temp_file_pointer(temp_file_name, pileup_fp_table, &close_now);
struct read_position_info record;
record.chro_offset = htonl(chro_offset_anchor);
......@@ -263,6 +263,7 @@ int write_read_pos(char * chro_name, unsigned int chro_offset_anchor, unsigned s
strcpy(record.cigar, cigar_str);
fwrite(&record, sizeof(record), 1, pileup_fp);
if(close_now) fclose(pileup_fp);
last_window_no = window_no;
}
......
......@@ -700,7 +700,18 @@ int init_indel_tables(global_context_t * context)
if(context->config.is_third_iteration_running)
{
char * fns = malloc(200);
fns[0]=0;
exec_cmd("ulimit -n", fns, 200);
int max_open_file = atoi(fns);
//SUBREADprintf("SYS FILE LIMIT=%d\n", max_open_file);
free(fns);
max_open_file = max(100, max_open_file);
max_open_file = min(3000, max_open_file);
indel_context -> local_reassembly_pileup_files = HashTableCreate(100);
indel_context -> local_reassembly_pileup_files -> appendix1 = NULL + max_open_file * 2/ 3;
HashTableSetDeallocationFunctions(indel_context -> local_reassembly_pileup_files, NULL, NULL);
HashTableSetKeyComparisonFunction(indel_context -> local_reassembly_pileup_files, my_strcmp);
HashTableSetHashFunction(indel_context -> local_reassembly_pileup_files ,HashTableStringHashFunction);
......@@ -1790,7 +1801,7 @@ int write_indel_final_results(global_context_t * global_context)
for(xk1 = 0; xk1 < indel_context -> total_events ; xk1++)
{
char * chro_name;
unsigned int chro_pos;
int chro_pos;
chromosome_event_t * event_body = indel_context -> event_space_dynamic +xk1;
......@@ -1946,7 +1957,7 @@ int write_local_reassembly(global_context_t *global_context, HashTable *pileup_f
{
char * chro_name;
unsigned int chro_offset;
int chro_offset;
int delta_pos = 0;
int new_read_len = trim_read(global_context, NULL, read_text, qual_text, read_len, &delta_pos);
......@@ -1973,12 +1984,14 @@ int write_local_reassembly(global_context_t *global_context, HashTable *pileup_f
if(0 == locate_gene_position_max(anchor_pos, &global_context -> chromosome_table, &chro_name, &chro_offset, read_len))
{
char temp_file_name[MAX_FILE_NAME_LENGTH];
int close_now = 0;
sprintf(temp_file_name,"%s@%s-%04u.bin", global_context -> config.temp_file_prefix, chro_name , chro_offset / BASE_BLOCK_LENGTH );
FILE * pileup_fp = get_temp_file_pointer(temp_file_name, pileup_fp_table);
FILE * pileup_fp = get_temp_file_pointer(temp_file_name, pileup_fp_table, &close_now);
//assert(read_len == strlen(read_text) && read_len > 90);
write_read_block_file(pileup_fp , 0, read_name, 0, chro_name , chro_offset, NULL, 0, read_text , qual_text, read_len , 1 , is_anchor_certain , anchor_pos, read_len);
if(close_now) fclose(pileup_fp);
}
......@@ -3639,8 +3652,8 @@ int finalise_pileup_file_by_voting(global_context_t * global_context , char * te
{
int write_cursor;
char * chro_begin;
unsigned int chro_offset_start = 0;
unsigned int chro_offset_end = 0;
int chro_offset_start = 0;
int chro_offset_end = 0;
locate_gene_position_max(contig_start_pos + head_removed_bases ,& global_context -> chromosome_table, &chro_begin, &chro_offset_start, 0);
locate_gene_position_max(contig_end_pos - tail_removed_bases ,& global_context -> chromosome_table, &chro_begin, &chro_offset_end, 0);
if(full_rebuilt_window_size - read_position_cursor - tail_removed_bases)
......@@ -3811,7 +3824,8 @@ void destroy_pileup_table(HashTable* local_reassembly_pileup_files)
{
if (!cursor) break;
FILE * fp = (FILE *)cursor->value;
fclose(fp);
if(fp != NULL+1)
fclose(fp);
free((void *)cursor->key);
cursor = cursor->next;
}
......@@ -3897,6 +3911,7 @@ void init_global_context(global_context_t * context)
srand(time(NULL));
memset(context->module_contexts, 0, 5*sizeof(void *));
memset(&context->config, 0, sizeof(configuration_t));
context->config.fast_run = 0;
context->config.memory_use_multiplex = 1;
......
......@@ -19,8 +19,8 @@ static struct option long_options[] =
{"threads", required_argument, 0, 'T'},
{"indel", required_argument, 0, 'I'},
{"phred", required_argument, 0, 'P'},
{"minmatch", required_argument, 0, 'm'},
{"minmatch2", required_argument, 0, 'p'},
{"minvotes", required_argument, 0, 'm'},
{"minvotes2", required_argument, 0, 'p'},
{"mindist", required_argument, 0, 'd'},
{"maxdist", required_argument, 0, 'D'},
{"order", required_argument, 0, 'S'},
......@@ -92,6 +92,10 @@ void print_usage_core_aligner()
SUBREADputs(" subreads that map in consensus) . If paired-end, this gives");
SUBREADputs(" the consensus threshold for the anchor read. 3 by default");
SUBREADputs("");
SUBREADputs(" -M <int> Specify the maximum number of mis-matched bases allowed in");
SUBREADputs(" the alignment. 3 by default. Mis-matches found in soft-");
SUBREADputs(" clipped bases are not counted.");
SUBREADputs("");
SUBREADputs(" -T <int> Number of CPU threads used, 1 by default.");
SUBREADputs("");
SUBREADputs(" -I <int> Maximum length (in bp) of indels that can be detected. 5 by");
......@@ -112,10 +116,6 @@ void print_usage_core_aligner()
SUBREADputs(" the mapping output. Note that read mapping is performed at");
SUBREADputs(" color-space.");
SUBREADputs("");
SUBREADputs(" -M <int> Specify the maximum number of mis-matched bases allowed in");
SUBREADputs(" the alignment. 3 by default. Mis-matches found in soft-");
SUBREADputs(" clipped bases are not counted.");
SUBREADputs("");
SUBREADputs(" --sv Detect structural variants (eg. long indel, inversion,");
SUBREADputs(" duplication and translocation) and report breakpoints. Refer");
SUBREADputs(" to Users Guide for breakpoint reporting.");
......@@ -219,27 +219,41 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
core_version_number("Subread-align");
return -1;
case 'G':
if(!is_valid_digit(optarg, "G"))
STANDALONE_exit(-1);
global_context->config.DP_penalty_create_gap = atoi(optarg);
break;
case 'Y':
if(!is_valid_digit(optarg, "Y"))
STANDALONE_exit(-1);
global_context->config.DP_match_score = atoi(optarg);
break;
case 'E':
if(!is_valid_digit(optarg, "E"))
STANDALONE_exit(-1);
global_context->config.DP_penalty_extend_gap = atoi(optarg);
break;
case 'X':
if(!is_valid_digit(optarg, "X"))
STANDALONE_exit(-1);
global_context->config.DP_mismatch_penalty = atoi(optarg);
break;
case '3':
if(!is_valid_digit(optarg, "3"))
STANDALONE_exit(-1);
global_context->config.read_trim_3 = atoi(optarg);
break;
case '5':
if(!is_valid_digit(optarg, "5"))
STANDALONE_exit(-1);
global_context->config.read_trim_5 = atoi(optarg);
break;
case 'J':
global_context->config.show_soft_cliping = 0;
break;
case 'B':
if(!is_valid_digit(optarg, "B"))
STANDALONE_exit(-1);
global_context->config.multi_best_reads = atoi(optarg);
if(global_context->config.multi_best_reads<1)
......@@ -274,12 +288,19 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
global_context->config.convert_color_to_base = 1;
break;
case 'D':
if(!is_valid_digit(optarg, "D"))
STANDALONE_exit(-1);
global_context->config.maximum_pair_distance = atoi(optarg);
break;
case 'd':
if(!is_valid_digit(optarg, "d"))
STANDALONE_exit(-1);
global_context->config.minimum_pair_distance = atoi(optarg);
break;
case 'n':
if(!is_valid_digit(optarg, "n"))
STANDALONE_exit(-1);
global_context->config.total_subreads = atoi(optarg);
//global_context->config.total_subreads = min(31,global_context->config.total_subreads );
break;
......@@ -287,7 +308,10 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
global_context->config.minimum_subread_for_first_read = atof(optarg);
break;
case 'T':
if(!is_valid_digit_range(optarg, "T", 1, 64))
STANDALONE_exit(-1);
global_context->config.all_threads = atoi(optarg);
if(global_context->config.all_threads <1) global_context->config.all_threads = 1;
if(global_context->config.all_threads > MAX_THREADS) global_context->config.all_threads = MAX_THREADS;
......@@ -306,6 +330,8 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
strncpy(global_context->config.output_prefix, optarg, MAX_FILE_NAME_LENGTH-1);
break;
case 'I':
if(!is_valid_digit_range(optarg, "I", 0, 200))
STANDALONE_exit(-1);
global_context->config.max_indel_length = atoi(optarg);
if(!is_64_bit_computer) global_context->config.max_indel_length = min(global_context->config.max_indel_length , 16);
......@@ -339,6 +365,8 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
}
break;
case 'M':
if(!is_valid_digit(optarg, "M"))
STANDALONE_exit(-1);
global_context->config.max_mismatch_exonic_reads = atoi(optarg);
break;
case 'P':
......@@ -348,6 +376,8 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
global_context->config.phred_score_format = FASTQ_PHRED64;
break;
case 'p':
if(!is_valid_digit(optarg, "p"))
STANDALONE_exit(-1);
global_context->config.minimum_subread_for_second_read = atoi(optarg);
break;
case 't':
......@@ -425,6 +455,9 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
}
else if(strcmp("minMappedLength", long_options[option_index].name)==0)
{
if(!is_valid_digit(optarg, "minMappedLength"))
STANDALONE_exit(-1);
global_context->config.min_mapped_fraction = atoi(optarg);
}
else if(strcmp("accurateFusions", long_options[option_index].name)==0)
......
......@@ -16,8 +16,8 @@ static struct option long_options[] =
{"read2", required_argument, 0, 'R'},
{"output", required_argument, 0, 'o'},
{"subreads", required_argument, 0, 'n'},
{"minmatch", required_argument, 0, 'm'},
{"minmatch2", required_argument, 0, 'p'},
{"minvotes", required_argument, 0, 'm'},
{"minvotes2", required_argument, 0, 'p'},
{"singleSAM", required_argument, 0, '1'},
{"pairedSAM", required_argument, 0, '2'},
{"threads", required_argument, 0, 'T'},
......@@ -93,6 +93,10 @@ void print_usage_core_subjunc()
SUBREADputs(" subreads that map in consensus) . If paired-end, this gives");
SUBREADputs(" the consensus threshold for the anchor read. 1 by default");
SUBREADputs("");
SUBREADputs(" -M <int> Specify the maximum number of mis-matched bases allowed in");
SUBREADputs(" the alignment. 3 by default. Mis-matches found in soft-");
SUBREADputs(" clipped bases are not counted.");
SUBREADputs("");
SUBREADputs(" -T <int> Number of CPU threads used, 1 by default.");
SUBREADputs("");
SUBREADputs(" -I <int> Maximum length (in bp) of indels that can be detected. 5 by");
......@@ -112,10 +116,6 @@ void print_usage_core_subjunc()
SUBREADputs(" the mapping output. Note that read mapping is performed at");
SUBREADputs(" color-space.");
SUBREADputs("");
SUBREADputs(" -M <int> Specify the maximum number of mis-matched bases allowed in");
SUBREADputs(" the alignment. 3 by default. Mis-matches found in soft-");
SUBREADputs(" clipped bases are not counted.");
SUBREADputs("");
SUBREADputs(" --SAMinput Input reads are in SAM format.");
SUBREADputs("");
SUBREADputs(" --BAMinput Input reads are in BAM format.");
......@@ -221,21 +221,39 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
core_version_number("Subjunc");
return -1;
case 'G':
if(!is_valid_digit(optarg, "G"))
STANDALONE_exit(-1);
global_context->config.DP_penalty_create_gap = atoi(optarg);
break;
case 'Y':
if(!is_valid_digit(optarg, "Y"))
STANDALONE_exit(-1);
global_context->config.DP_match_score = atoi(optarg);
break;
case 'E':
if(!is_valid_digit(optarg, "E"))
STANDALONE_exit(-1);
global_context->config.DP_penalty_extend_gap = atoi(optarg);
break;
case 'X':
if(!is_valid_digit(optarg, "X"))
STANDALONE_exit(-1);
global_context->config.DP_mismatch_penalty = atoi(optarg);
break;
case '3':
if(!is_valid_digit(optarg, "3"))
STANDALONE_exit(-1);
global_context->config.read_trim_3 = atoi(optarg);
break;
case '5':
if(!is_valid_digit(optarg, "5"))
STANDALONE_exit(-1);
global_context->config.read_trim_5 = atoi(optarg);
break;
......@@ -274,27 +292,45 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
global_context->config.convert_color_to_base = 1;
break;
case 'D':
if(!is_valid_digit(optarg, "D"))
STANDALONE_exit(-1);
global_context->config.maximum_pair_distance = atoi(optarg);
break;
case 'd':
if(!is_valid_digit(optarg, "d"))
STANDALONE_exit(-1);
global_context->config.minimum_pair_distance = atoi(optarg);
if(global_context->config.minimum_pair_distance <0)
global_context->config.restrected_read_order = 0;
break;
case 'n':
if(!is_valid_digit(optarg, "n"))
STANDALONE_exit(-1);
global_context->config.total_subreads = atoi(optarg);
//global_context->config.total_subreads = min(31,global_context->config.total_subreads);
break;
case 'm':
if(!is_valid_digit(optarg, "m"))
STANDALONE_exit(-1);
global_context->config.minimum_subread_for_first_read = atoi(optarg);
break;
case 'T':
if(!is_valid_digit_range(optarg, "T", 1, 64))
STANDALONE_exit(-1);
global_context->config.all_threads = atoi(optarg);
if(global_context->config.all_threads <1) global_context->config.all_threads = 1;
if(global_context->config.all_threads > MAX_THREADS) global_context->config.all_threads = MAX_THREADS;
break;
case 'M':
if(!is_valid_digit(optarg, "M"))
STANDALONE_exit(-1);
global_context->config.max_mismatch_exonic_reads = atoi(optarg);
global_context->config.max_mismatch_junction_reads = atoi(optarg);
break;
......@@ -309,6 +345,9 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
strncpy(global_context->config.output_prefix, optarg, MAX_FILE_NAME_LENGTH-1);
break;
case 'I':
if(!is_valid_digit(optarg, "I"))
STANDALONE_exit(-1);
global_context->config.max_indel_length = atoi(optarg);
if(!is_64_bit_computer) global_context->config.max_indel_length = min(global_context->config.max_indel_length , 16);
......@@ -342,6 +381,9 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
global_context->config.phred_score_format = FASTQ_PHRED64;
break;
case 'p':
if(!is_valid_digit(optarg, "p"))
STANDALONE_exit(-1);
global_context->config.minimum_subread_for_second_read = atoi(optarg);
break;
case 'F':
......@@ -349,6 +391,9 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
global_context->config.report_sam_file = 0;
break;
case 'B':
if(!is_valid_digit(optarg, "B"))
STANDALONE_exit(-1);
global_context->config.multi_best_reads = atoi(optarg);
if(global_context->config.multi_best_reads<1)
......@@ -398,6 +443,8 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
}
else if(strcmp("junctionIns", long_options[option_index].name)==0)
{
if(!is_valid_digit(optarg, "junctionIns"))
STANDALONE_exit(-1);
global_context->config.check_donor_at_junctions=0;
global_context->config.limited_tree_scan = 0;
global_context->config.max_insertion_at_junctions = atoi(optarg);
......@@ -408,6 +455,8 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
}
else if(strcmp("minMappedFraction", long_options[option_index].name)==0)
{
if(!is_valid_digit(optarg, "minMappedFraction"))
STANDALONE_exit(-1);
global_context->config.min_mapped_fraction = atoi(optarg);
}
else if(strcmp("relaxMismatchedBases", long_options[option_index].name)==0)
......
This diff is collapsed.
......@@ -153,5 +153,5 @@ int is_funky_fragment(global_context_t * global_context, char * rname1, char * c
void finalise_structural_variances(global_context_t * global_context);
void get_event_two_coordinates(global_context_t * global_context, unsigned int event_no, char ** small_chro, unsigned int * small_pos, unsigned int * small_abs, char ** large_chro, unsigned int * large_pos, unsigned int * large_abs);
void get_event_two_coordinates(global_context_t * global_context, unsigned int event_no, char ** small_chro, int * small_pos, unsigned int * small_abs, char ** large_chro, int * large_pos, unsigned int * large_abs);
#endif
This diff is collapsed.
......@@ -451,6 +451,7 @@ typedef struct{
int output_buffer_item;
int output_buffer_pointer;
int is_finished;
unsigned int all_mapped_reads;
subread_lock_t output_lock;
} thread_context_t;
......@@ -632,4 +633,8 @@ void absoffset_to_posstr(global_context_t * global_context, unsigned int pos, ch
void test_PE_and_same_chro(global_context_t * global_context , unsigned int pos1, unsigned int pos2, int * is_PE_distance, int * is_same_chromosome , int rlen1, int rlen2);
int FIXLENstrcmp(char * fixed_len, char * rname);
int is_valid_digit(char * optarg, char * optname);
int is_valid_digit_range(char * optarg, char * optname, int min, int max_inc);
int exec_cmd(char * cmd, char * outstr, int out_limit);
#endif
......@@ -14,6 +14,7 @@
unsigned long long all_counted;
typedef unsigned int coverage_bin_entry_t;
int is_BAM_input = 0;
int max_M = 10;
char input_file_name[300];
char output_file_name[300];
HashTable * cov_bin_table;
......@@ -21,6 +22,7 @@ HashTable * cov_bin_table;
static struct option cov_calc_long_options[] =
{
{"maxMOp",required_argument, 0, 'M'},
{"primary",no_argument, 0, 0},
{0, 0, 0, 0}
};
......@@ -35,7 +37,7 @@ void calcCount_usage()
SUBREADputs("");
SUBREADputs("Usage");
SUBREADputs("");
SUBREADputs(" ./coverageCount -i <input_file> -o <output_prefix>");
SUBREADputs(" ./coverageCount [options] -i <input_file> -o <output_prefix>");
SUBREADputs("");
SUBREADputs("Required arguments:");
SUBREADputs("");
......@@ -44,6 +46,12 @@ void calcCount_usage()
SUBREADputs(" -o <string> Prefix of the output files. Each output file contains Four-byte");
SUBREADputs(" integer numbers");
SUBREADputs("");
SUBREADputs("Optional arguments:");
SUBREADputs("");
SUBREADputs(" --maxMOp <int> Maximum number of 'M' operations allowed in a CIGAR string.");
SUBREADputs(" 10 by default. Both 'X' and '=' are treated as 'M' and adjacent");
SUBREADputs(" 'M' operations are merged in the CIGAR string.");
SUBREADputs("");
}
void add_chro(char *sam_h)
......@@ -162,22 +170,22 @@ int covCalc()
}
coverage_bin_entry_t * chrbin = (coverage_bin_entry_t*) bin_entry[0];
unsigned int chrlen = (void *)( bin_entry[0]) - NULL;
int cigar_sections = RSubread_parse_CIGAR_string(chro, pos, cigar_str, Chros, Staring_Points, Staring_Read_Points, Section_Lengths, &is_junc);
unsigned int chrlen = (void *)( bin_entry[1]) - NULL;
int cigar_sections = RSubread_parse_CIGAR_string(chro, pos, cigar_str, max_M, Chros, Staring_Points, Staring_Read_Points, Section_Lengths, &is_junc);
for(x1 = 0; x1 < cigar_sections; x1++)
{
unsigned int x2;
//printf("%s: %u-%u\n", chro, Staring_Points[x1], Staring_Points[x1]+Section_Lengths[x1]);
for(x2 = Staring_Points[x1]; x2<Staring_Points[x1]+Section_Lengths[x1]; x2++)
{
if(pos+x2 < chrlen)
{
if(x2 < chrlen) {
if(chrbin[x2] <= COVERAGE_MAX_INT)chrbin[x2] ++;
all_counted ++;
if(all_counted % 10000000 == 0)
{
SUBREADprintf("Processed %llu bases.\n", all_counted);
}
} else {
SUBREADprintf("%s:%s %u [%s] :: %u-%u\n", line_buffer , chro, pos, cigar_str, Staring_Points[x1], Staring_Points[x1]+Section_Lengths[x1]);
SUBREADprintf("Read %s overhangs the boundary of chromosome %s (%u >= %u)\n", line_buffer, chro, x2, chrlen);
exit(-1);
}