Skip to content
Commits on Source (7)
subread (1.6.3+dfsg-1) UNRELEASED; urgency=medium
* New upstream version 1.6.3+dfsg
* DH_VERBOSE=1, check for nocheck in DEB_BUILD_OPTIONS
* Bump Policy to 4.2.1
* Refresh patches
* New binary detectionCall segfaults,
remove dropped by upstream coverageCount
* TODO: waiting for answer from upstream regarding segfault on derectionCall
-- Alexandre Mestiashvili <mestia@debian.org> Thu, 11 Oct 2018 12:33:55 +0000
subread (1.6.2+dfsg-1) unstable; urgency=medium
* New upstream version 1.6.2+dfsg
......
......@@ -9,7 +9,7 @@ Build-Depends: bc,
help2man,
python,
zlib1g-dev
Standards-Version: 4.1.4
Standards-Version: 4.2.1
Vcs-Browser: https://salsa.debian.org/med-team/subread
Vcs-Git: https://salsa.debian.org/med-team/subread.git
Homepage: http://sourceforge.net/projects/subread/
......
......@@ -14,10 +14,10 @@ Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
-include ~/.R/DBPZ_debug_makefile
-CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 ${WARNING_LEVEL}
-LDFLAGS = ${STATIC_MAKE} -pthread -lz -lm ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE
-LDFLAGS = ${STATIC_MAKE} -pthread -lz ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -lm
-CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb
+CCFLAGS += -O${OPT_LEVEL} -fsigned-char -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 ${WARNING_LEVEL}
+LDFLAGS += ${STATIC_MAKE} -pthread -lz -lm -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE
+LDFLAGS += ${STATIC_MAKE} -pthread -lz -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -lm
+CC = ${CC_EXEC} ${CCFLAGS} ${CFLAGS} ${CPPFLAGS} -fmessage-length=0 -ggdb
......@@ -38,4 +38,3 @@ Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
ALL_LIBS=LRMsorted-hashtable LRMbase-index LRMchro-event LRMhelper LRMseek-zlib LRMfile-io LRMhashtable
......@@ -12,7 +12,7 @@ From: Alex Mestiashvili <alex@biotec.tu-dresden.de>
#ifdef MACOS
int mib[6], x1, ret = 1;
size_t len;
@@ -881,6 +884,7 @@
@@ -883,6 +886,7 @@
return 1;
#endif
#endif
......
......@@ -2,7 +2,7 @@ Description: Fix typos
Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
--- subread.orig/src/sambam-file.c
+++ subread/src/sambam-file.c
@@ -187,7 +187,7 @@
@@ -204,7 +204,7 @@
return "MIDNSHP=X"[ch];
else
{
......@@ -13,7 +13,7 @@ Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
}
--- subread.orig/src/readSummary.c
+++ subread/src/readSummary.c
@@ -3691,7 +3691,7 @@
@@ -3748,7 +3748,7 @@
assigned_no++;
}
}
......@@ -22,7 +22,7 @@ Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
final_feture_names[GENE_NAME_LIST_BUFFER_SIZE-1]=0;
if(RG_name){
@@ -6439,7 +6439,7 @@
@@ -6503,7 +6503,7 @@
else if(optarg[0]=='5')
reduce_5_3_ends_to_one = REDUCE_TO_5_PRIME_END;
else{
......
#!/usr/bin/make -f
export DH_VERBOSE=1
pkg := subread
mandir := $(CURDIR)/debian/$(pkg)/usr/share/man/man1
bindir := $(CURDIR)/bin
......@@ -31,7 +33,9 @@ override_dh_auto_build:
dh_auto_build -- CC=$(CC)
override_dh_auto_test:
ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
debian/tests/subread-tests $(CURDIR)
endif
HELP2MAN = help2man --no-info --help-option="''" --no-discard-stderr
......@@ -65,8 +69,11 @@ DNA-seq reads and RNA-seq reads (for the purpose of expression analysis)' \
$(HELP2MAN) --name='scans the entire genome and reports all matches of a specified sequence' \
$(utildir)/subread-fullscan | debian/filter.pl > $(mandir)/subread-fullscan.1;
$(HELP2MAN) --name='counting the coverage of mapped reads at each location on the entire reference genome' \
$(utildir)/coverageCount | debian/filter.pl > $(mandir)/coverageCount.1;
# $(HELP2MAN) --name='counting the coverage of mapped reads at each location on the entire reference genome' \
# $(utildir)/coverageCount | debian/filter.pl > $(mandir)/coverageCount.1;
$(HELP2MAN) --name='detectionCall' \
$(utildir)/detectionCall | debian/filter.pl > $(mandir)/detectionCall.1;
$(HELP2MAN) --name='calculate the proportion of mapped reads/fragments' \
$(utildir)/propmapped | debian/filter.pl > $(mandir)/propmapped.1;
......
......@@ -5,7 +5,7 @@ bin/subjunc usr/bin
bin/sublong usr/bin
bin/subread-align usr/bin
bin/subread-buildindex usr/bin
bin/utilities/coverageCount usr/lib/subread
bin/utilities/detectionCall usr/lib/subread
bin/utilities/propmapped usr/lib/subread
bin/utilities/qualityScores usr/lib/subread
bin/utilities/removeDup usr/lib/subread
......
This diff is collapsed.
......@@ -839,6 +839,7 @@ int mac_str(char * str_buff)
}
return ret;
#else
#if defined(IFHWADDRLEN)
struct ifreq ifr;
struct ifconf ifc;
char buf[1024];
......@@ -878,6 +879,7 @@ int mac_str(char * str_buff)
}
return 0;
}
#endif
return 1;
#endif
#endif
......@@ -1118,7 +1120,7 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
if(!is_GFF_geneid_warned){
int ext_att_len = strlen(extra_attrs);
if(extra_attrs[ext_att_len-1] == '\n') extra_attrs[ext_att_len-1] =0;
SUBREADprintf("\nWarning: failed to find the gene identifier attribute in the 9th column of the provided GTF file.\nThe specified gene identifier attribute is '%s' \nThe attributes included in your GTF annotation are '%s' \n\n", gene_id_column, extra_attrs);
SUBREADprintf("\nERROR: failed to find the gene identifier attribute in the 9th column of the provided GTF file.\nThe specified gene identifier attribute is '%s'.\nAn example of attributes included in your GTF annotation is '%s'.\nThe program has to terminate.\n\n", gene_id_column, extra_attrs);
}
is_GFF_geneid_warned++;
}
......@@ -1127,7 +1129,7 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
if(!is_GFF_txid_warned){
int ext_att_len = strlen(extra_attrs);
if(extra_attrs[ext_att_len-1] == '\n') extra_attrs[ext_att_len-1] =0;
SUBREADprintf("\nWarning: failed to find the transcript identifier attribute in the 9th column of the provided GTF file.\nThe specified gene identifier attribute is '%s' \nThe attributes included in your GTF annotation are '%s' \n\n", transcript_id_column, extra_attrs);
SUBREADprintf("\nERROR: failed to find the transcript identifier attribute in the 9th column of the provided GTF file.\nThe specified transcript identifier attribute is '%s'.\nAn example of attributes included in your GTF annotation is '%s'.\nThe program has to terminate\n\n", transcript_id_column, extra_attrs);
}
is_GFF_txid_warned++;
}
......@@ -1142,6 +1144,12 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
}
autozip_close(&afp);
free(file_line);
if(is_GFF_txid_warned || is_GFF_geneid_warned)return -2;
if(loaded_features<1){
SUBREADprintf("\nERROR: No feature was loaded from the annotation file. Please check if the annotation format was correctly specified, and also if the feature type was correctly specified if the annotation is in the GTF format.\n\n");
return -2;
}
return loaded_features;
}
......
......@@ -14,11 +14,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped coverageCount
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount subtools qualityScores propmapped subread-fullscan removeDup ../bin/utilities
mv repair subtools qualityScores propmapped subread-fullscan removeDup ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -70,9 +70,6 @@ subread-fullscan: fullscan.c ${ALL_OBJECTS}
subtools: subtools.c ${ALL_OBJECTS}
${CC} -o subtools subtools.c ${ALL_OBJECTS} ${LDFLAGS}
coverageCount: coverage_calc.c ${ALL_OBJECTS}
${CC} -o coverageCount coverage_calc.c ${ALL_OBJECTS} ${LDFLAGS}
clean:
rm -f core featureCounts exactSNP removeDup subread-buildindex ${ALL_OBJECTS}
......@@ -8,7 +8,7 @@ include makefile.version
-include ~/.R/DBPZ_debug_makefile
CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 ${WARNING_LEVEL}
LDFLAGS = ${STATIC_MAKE} -pthread -lz -lm ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE
LDFLAGS = ${STATIC_MAKE} -pthread -lz ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -lm
CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb
......@@ -17,11 +17,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount flattenGTF # samMappedBases mergeVCF testZlib
all: detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # samMappedBases mergeVCF testZlib
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv detectionCall repair coverageCount propmapped qualityScores removeDup subread-fullscan txUnique flattenGTF ../bin/utilities
mv detectionCall repair propmapped qualityScores removeDup subread-fullscan txUnique flattenGTF ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -85,8 +85,5 @@ qualityScores: qualityScores.c ${ALL_OBJECTS}
subread-fullscan: fullscan.c ${ALL_OBJECTS}
${CC} -o subread-fullscan fullscan.c ${ALL_OBJECTS} ${LDFLAGS}
coverageCount: coverage_calc.c ${ALL_OBJECTS}
${CC} -o coverageCount coverage_calc.c ${ALL_OBJECTS} ${LDFLAGS}
clean:
rm -f core featureCounts exactSNP removeDup subread-buildindex ${ALL_OBJECTS}
......@@ -11,11 +11,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount flattenGTF # globalReassembly testZlib
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # globalReassembly testZlib
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount subread-fullscan qualityScores removeDup propmapped flattenGTF ../bin/utilities
mv repair subread-fullscan qualityScores removeDup propmapped flattenGTF ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -70,9 +70,6 @@ qualityScores: qualityScores.c ${ALL_OBJECTS}
subread-fullscan: fullscan.c ${ALL_OBJECTS}
${CC} -o subread-fullscan fullscan.c ${ALL_OBJECTS} ${LDFLAGS}
coverageCount: coverage_calc.c ${ALL_OBJECTS}
${CC} -o coverageCount coverage_calc.c ${ALL_OBJECTS} ${LDFLAGS}
testZlib: test-seek-zlib.c ${ALL_OBJECTS}
${CC} -o testZlib test-seek-zlib.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -476,8 +476,7 @@ void fishers_test_on_POI(struct SNP_Calling_Parameters * parameters, float * snp
}
}
}
void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * snp_fisher_raw, unsigned int * snp_voting_piles, char * referenced_genome, unsigned int reference_len, double multiplex_base, char * SNP_bitmap_recorder, unsigned short * snp_filter_background_matched, unsigned short * snp_filter_background_unmatched, int all_result_needed)
{
void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * snp_fisher_raw, unsigned int * snp_voting_piles, char * referenced_genome, unsigned int reference_len, double multiplex_base, char * SNP_bitmap_recorder, unsigned short * snp_filter_background_matched, unsigned short * snp_filter_background_unmatched, int all_result_needed, char * chro_name, unsigned int block_start) {
int i, j, is_fresh_jumppd, remove_old;
int a=0, b=0, c=0, d=0, go_ahead = 0, left_tail = 0;
......@@ -547,6 +546,7 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
float observed_coverage = (b+d) *1./(1. + 2 * parameters -> fisher_exact_testlen) ;
double p_cutoff = pow(10, -(observed_coverage/multiplex_base));
p_cutoff = min(parameters -> cutoff_upper_bound, p_cutoff);
p_cutoff = max(1E-320, p_cutoff);
int flanking_unmatched = b-a;
int flanking_matched = d-c;
......@@ -557,8 +557,11 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
}
float p_middle = fisher_exact_test(a, flanking_unmatched, c, flanking_matched);
if(0 && flanking_matched > 10000 && p_middle < 1E-5)
SUBREADprintf("TEST: %u a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d; p=%G; p-cut=%G\n", i,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail, p_middle, p_cutoff);
unsigned int chro_pos = 1 + block_start + i;
if(0 && chro_pos < 11109861+10 && chro_pos > 11109861-10)
SUBREADprintf("TEST: %s : %u a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d; p=%G; p-cut=%G\n", chro_name, chro_pos ,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail, p_middle, p_cutoff);
if(all_result_needed || ( p_middle < p_cutoff && flanking_matched*20>(flanking_matched+ flanking_unmatched )*16))
snp_fisher_raw [i] = p_middle;
else snp_fisher_raw [i] = -999;
......@@ -663,7 +666,7 @@ int process_snp_votes(FILE *out_fp, unsigned int offset , unsigned int reference
pcutoff_list[i]=-1.;
}
int read_is_error = read_tmp_block(parameters, tmp_fp,&SNP_bitmap_recorder,snp_voting_piles,block_no, reference_len, referenced_genome, chro_name, offset);
int read_is_error = read_tmp_block(parameters, tmp_fp,&SNP_bitmap_recorder,snp_voting_piles,block_no, reference_len, referenced_genome, chro_name, offset - reference_len);
fclose(tmp_fp);
if (parameters -> delete_piles)
......@@ -832,10 +835,10 @@ int process_snp_votes(FILE *out_fp, unsigned int offset , unsigned int reference
if(parameters -> fisher_exact_testlen)
{
fishers_test_on_block(parameters, snp_fisher_raw, snp_voting_piles, referenced_genome, reference_len, multiplex_base, SNP_bitmap_recorder, snp_filter_background_matched, snp_filter_background_unmatched, 0);
fishers_test_on_block(parameters, snp_fisher_raw, snp_voting_piles, referenced_genome, reference_len, multiplex_base, SNP_bitmap_recorder, snp_filter_background_matched, snp_filter_background_unmatched, 0, chro_name, BASE_BLOCK_LENGTH*block_no +1);
if(parameters->background_input_file[0])
{
fishers_test_on_block(parameters, snp_fisher_BGC, snp_BGC_piles, referenced_genome, reference_len, multiplex_base, SNP_bitmap_recorder, NULL, NULL, 1);
fishers_test_on_block(parameters, snp_fisher_BGC, snp_BGC_piles, referenced_genome, reference_len, multiplex_base, SNP_bitmap_recorder, NULL, NULL, 1, chro_name, BASE_BLOCK_LENGTH*block_no +1);
fishers_test_on_POI(parameters, snp_fisher_VS, snp_voting_piles, snp_BGC_piles, referenced_genome, reference_len, snp_fisher_raw);
}
}
......@@ -1476,7 +1479,7 @@ int SNP_calling(char * in_SAM_file, char * out_BED_file, char * in_FASTA_file, c
}
if(parameters -> background_input_file[0])
{
char temp_file_prefix2[300];
char temp_file_prefix2[350];
sprintf(temp_file_prefix2, "%sBGC-", temp_file_prefix);
if(break_SAM_file(parameters -> background_input_file, parameters -> is_BAM_file_input, temp_file_prefix2, NULL, NULL, known_chromosomes, 1, parameters -> bases_ignored_head_tail, parameters->subread_index_array, parameters->subread_index_offsets, NULL, NULL, NULL)) return -1;
}
......
......@@ -36,9 +36,7 @@ unsigned int get_handling_thread_number(global_context_t * global_context , subr
*/return 0;
}
unsigned long long get_inner_pair(global_context_t * global_context , subread_read_number_t pair_number){
return pair_number;
}
#define get_inner_pair(a,b) (b)
bigtable_cached_result_t * bigtable_retrieve_cache(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int is_second_read, int load_more);
......@@ -94,48 +92,33 @@ int init_bigtable_results(global_context_t * global_context, int is_rewinding)
//SUBREADprintf("reads_per_chunk = %u ; cached_single_reads = %u ; size of each read = %d + %d\n", global_context -> config.reads_per_chunk, global_context -> bigtable_cache_size, sizeof(mapping_result_t) , sizeof(subjunc_result_t));
if(!is_rewinding){
//
if(is_rewinding){
memset( global_context -> bigtable_cache_malloc_ptr, 0, sizeof(mapping_result_t) * global_context -> config.multi_best_reads * global_context -> bigtable_cache_size );
if(global_context -> config.do_breakpoint_detection)
memset(global_context -> bigtable_cache_malloc_ptr_junc , 0, sizeof(subjunc_result_t) * global_context -> config.multi_best_reads * global_context -> bigtable_cache_size);
}else{
global_context -> bigtable_cache = malloc(sizeof(bigtable_cached_result_t) * global_context -> bigtable_cache_size);
global_context -> bigtable_cache_malloc_ptr = calloc( sizeof(mapping_result_t) , global_context -> config.multi_best_reads * global_context -> bigtable_cache_size );
if(global_context -> config.do_breakpoint_detection)
global_context -> bigtable_cache_malloc_ptr_junc = calloc(sizeof(subjunc_result_t) , global_context -> config.multi_best_reads * global_context -> bigtable_cache_size);
assert(NULL != global_context -> bigtable_cache_malloc_ptr );
}
int xk1;
for(xk1 = 0; xk1 < global_context -> bigtable_cache_size; xk1++){
if(!is_rewinding){
global_context -> bigtable_cache [xk1].alignment_res = malloc(sizeof(mapping_result_t) * global_context -> config.multi_best_reads);
assert(global_context -> bigtable_cache [xk1].alignment_res!=NULL);
}
if(global_context -> config.use_memory_buffer)
{
memset(global_context -> bigtable_cache [xk1].big_margin_data, 0, sizeof(global_context -> bigtable_cache [xk1].big_margin_data));
memset(global_context -> bigtable_cache [xk1].alignment_res, 0, sizeof(mapping_result_t) * global_context -> config.multi_best_reads);
}
if(!is_rewinding) for(xk1 = 0; xk1 < global_context -> bigtable_cache_size; xk1++){
global_context -> bigtable_cache [xk1].alignment_res = (mapping_result_t*) global_context -> bigtable_cache_malloc_ptr + global_context -> config.multi_best_reads * xk1;
if(global_context -> config.do_breakpoint_detection){
if(!is_rewinding)
global_context -> bigtable_cache [xk1].subjunc_res = malloc(sizeof(subjunc_result_t) * global_context -> config.multi_best_reads);
if(global_context -> config.use_memory_buffer)
memset(global_context -> bigtable_cache [xk1].subjunc_res , 0, sizeof(subjunc_result_t) * global_context -> config.multi_best_reads);
}
if(global_context -> config.do_breakpoint_detection)
global_context -> bigtable_cache [xk1].subjunc_res = (subjunc_result_t*)global_context -> bigtable_cache_malloc_ptr_junc + global_context -> config.multi_best_reads * xk1;
}
if(global_context->config.do_big_margin_filtering_for_junctions)for(xk1 = 0; xk1 < global_context -> bigtable_cache_size; xk1++)
memset(global_context -> bigtable_cache [xk1].big_margin_data, 0, sizeof(global_context -> bigtable_cache [xk1].big_margin_data));
subread_init_lock(&global_context -> bigtable_lock);
if(global_context -> config.use_memory_buffer)
assert(global_context -> config.use_memory_buffer);
global_context -> bigtable_cache_file_fp = NULL;
else {
char tmpfname[MAX_FILE_NAME_LENGTH+33];
sprintf(tmpfname, "%s-%02d-align.bin", global_context -> config.temp_file_prefix, 0);
//if(is_rewinding) unlink(tmpfname);
FILE * fp = fopen(tmpfname, "w+");
global_context -> bigtable_cache_file_fp = fp;
global_context -> bigtable_cache_file_fragments = -1;
global_context -> bigtable_cache_file_loaded_fragments_begin = -1;
global_context -> bigtable_dirty_data = 0;
}
return 0;
}
......@@ -156,25 +139,8 @@ subjunc_result_t * _global_retrieve_subjunc_ptr(global_context_t * global_contex
void bigtable_write_thread_cache(global_context_t * global_context){
if(global_context -> bigtable_cache_file_fp == NULL) return;
if(global_context -> bigtable_dirty_data && global_context -> bigtable_cache_file_loaded_fragments_begin>=0 )
{
long long start_file_location = calc_file_location( global_context -> bigtable_cache_file_loaded_fragments_begin);
int xk1, xk2;
fseeko(global_context -> bigtable_cache_file_fp, start_file_location, SEEK_SET);
for(xk1 = 0; xk1 < global_context -> bigtable_chunked_fragments; xk1++) {
for(xk2 = 0; xk2 < 1 + global_context -> input_reads.is_paired_end_reads; xk2++){
bigtable_cached_result_t * current_cache = global_context -> bigtable_cache + xk1 * (1+global_context -> input_reads.is_paired_end_reads) + xk2;
fwrite( current_cache -> big_margin_data , sizeof(short) * 3 * global_context -> config.big_margin_record_size , 1, global_context -> bigtable_cache_file_fp);
fwrite( current_cache -> alignment_res , sizeof(mapping_result_t) * global_context -> config.multi_best_reads , 1, global_context -> bigtable_cache_file_fp);
if(global_context -> config.do_breakpoint_detection)
fwrite( current_cache -> subjunc_res , sizeof(subjunc_result_t) * global_context -> config.multi_best_reads , 1, global_context -> bigtable_cache_file_fp);
}
}
}
assert(global_context -> bigtable_cache_file_fp == NULL);
return;
}
......@@ -194,102 +160,7 @@ bigtable_cached_result_t * bigtable_retrieve_cache(global_context_t * global_con
{
long long inner_pair_number = get_inner_pair(global_context, pair_number);
long long load_start_pair_no = inner_pair_number - inner_pair_number % global_context -> bigtable_chunked_fragments;
if(global_context -> bigtable_cache_file_fp){
if(global_context -> bigtable_cache_file_loaded_fragments_begin == -1 || inner_pair_number >= global_context -> bigtable_cache_file_loaded_fragments_begin + global_context -> bigtable_chunked_fragments || inner_pair_number < global_context -> bigtable_cache_file_loaded_fragments_begin)
{
SUBREADprintf("THREAD # %d WAITING FOR %llu for RETRIEVE %llu\n", thread_context? thread_context -> thread_id:-1, global_context -> bigtable_cache_file_loaded_fragments_begin, pair_number);
wait_occupied(global_context, global_context -> bigtable_cache_file_loaded_fragments_begin);
}
bigtable_lock(global_context);
if(global_context -> bigtable_cache_file_loaded_fragments_begin == -1 || inner_pair_number >= global_context -> bigtable_cache_file_loaded_fragments_begin + global_context -> bigtable_chunked_fragments || inner_pair_number < global_context -> bigtable_cache_file_loaded_fragments_begin)
{
long long load_end_pair_no = load_start_pair_no + global_context -> bigtable_chunked_fragments;
int rlen = -1;
// this function will see if there is data to write or not.
bigtable_write_thread_cache(global_context);
// load or extend the real file
if(load_start_pair_no < global_context -> bigtable_cache_file_fragments){
long long start_file_location = calc_file_location(load_start_pair_no);
int xk1, xk2;
//SUBREADprintf("READ_IN %lld\n", load_start_pair_no);
fseeko(global_context -> bigtable_cache_file_fp, start_file_location, SEEK_SET);
for(xk1 = 0; xk1 < global_context -> bigtable_chunked_fragments; xk1++)
{
for(xk2 = 0; xk2 < 1 + global_context -> input_reads.is_paired_end_reads; xk2++){
bigtable_cached_result_t * current_cache = global_context -> bigtable_cache + xk1* (1+global_context -> input_reads.is_paired_end_reads) + xk2;
rlen = fread( current_cache -> big_margin_data , sizeof(short) * 3 * global_context -> config.big_margin_record_size , 1, global_context -> bigtable_cache_file_fp );
if(rlen < 1){
SUBREADprintf("ERROR: cannot read margin\n");
return NULL;
}
rlen = fread( current_cache -> alignment_res , sizeof(mapping_result_t) * global_context -> config.multi_best_reads , 1, global_context -> bigtable_cache_file_fp );
if(rlen < 1){
SUBREADprintf("ERROR: cannot read margin\n");
return NULL;
}
if(global_context -> config.do_breakpoint_detection){
rlen = fread( current_cache -> subjunc_res , sizeof(subjunc_result_t) * global_context -> config.multi_best_reads , 1, global_context -> bigtable_cache_file_fp);
if(rlen < 1){
SUBREADprintf("ERROR: cannot read margin\n");
return NULL;
}
}
}
}
}else{
long long new_file_size = calc_file_location(load_end_pair_no);
//SUBREADprintf("FILE_TRUNCATE %lld\n", load_start_pair_no);
rlen = ftruncate(fileno(global_context -> bigtable_cache_file_fp), new_file_size);
if(rlen != 0){
SUBREADprintf("ERROR: cannot truncate file\n");
return NULL;
}
global_context -> bigtable_cache_file_fragments = load_end_pair_no;
int xk1, xk2;
for(xk1 = 0; xk1 < global_context -> bigtable_chunked_fragments; xk1++)
{
for(xk2 = 0; xk2 < 1 + global_context -> input_reads.is_paired_end_reads; xk2++)
{
bigtable_cached_result_t * current_cache = global_context -> bigtable_cache + xk1 * (1+global_context -> input_reads.is_paired_end_reads) + xk2;
memset( current_cache -> big_margin_data , 0 , sizeof(short) * 3 * global_context -> config.big_margin_record_size);
if(0 && xk1 < 10)
{
//SUBREADprintf("CACHEP_211: %p (%d from %llu)\n", current_cache, xk1, pair_number);
SUBREADprintf("NENSET_211: %p + %d\n", current_cache -> alignment_res, xk1 * (1+global_context -> input_reads.is_paired_end_reads) + xk2);
}
memset( current_cache -> alignment_res , 0, sizeof(mapping_result_t) * global_context -> config.multi_best_reads);
if(0 && xk1 < 10)
{
SUBREADprintf("FINE_211: %p\n", current_cache -> alignment_res);
}
if(global_context -> config.do_breakpoint_detection)
memset( current_cache -> subjunc_res , 0, sizeof(subjunc_result_t) * global_context -> config.multi_best_reads);
}
}
}
global_context -> bigtable_cache_file_loaded_fragments_begin = load_start_pair_no;
global_context -> bigtable_dirty_data = 0;
}
bigtable_unlock(global_context);
}
if(global_context -> bigtable_cache_file_fp){
global_context -> bigtable_cache[inner_pair_number - load_start_pair_no].status = CACHE_STATUS_OCCUPIED;
}
bigtable_cached_result_t * ret_cache = global_context -> bigtable_cache + (inner_pair_number - load_start_pair_no)* (1+global_context -> input_reads.is_paired_end_reads) + is_second_read;
return ret_cache;
}
......@@ -313,36 +184,14 @@ unsigned short * _global_retrieve_big_margin_ptr(global_context_t * global_conte
void bigtable_release_result(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int commit_change){
if(global_context -> bigtable_cache_file_fp){
long long inner_pair_number = get_inner_pair(global_context, pair_number);
long long load_start_pair_no = inner_pair_number - inner_pair_number % global_context -> bigtable_chunked_fragments;
//SUBREADprintf("REAL RELEASE:%lld\n", inner_pair_number - load_start_pair_no);
global_context -> bigtable_cache[inner_pair_number - load_start_pair_no].status = CACHE_STATUS_RELEASED;
//SUBREADprintf("OCCUPYXX0 = %d\n", global_context -> bigtable_cache[0].status);
if(commit_change){
global_context -> bigtable_dirty_data=1;
}
}
assert(NULL == "File result cache is no longer used. Do not call this function!");
}
int finalise_bigtable_results(global_context_t * global_context){
if(global_context -> bigtable_cache_file_fp){
fclose(global_context -> bigtable_cache_file_fp);
char tmpfname[MAX_FILE_NAME_LENGTH+33];
sprintf(tmpfname, "%s-%02d-align.bin", global_context -> config.temp_file_prefix, 0);
unlink(tmpfname);
}
int x1;
for(x1 = 0; x1 < global_context -> bigtable_cache_size; x1++){
free(global_context -> bigtable_cache[x1].alignment_res);
if(global_context -> config.do_breakpoint_detection)
free(global_context -> bigtable_cache[x1].subjunc_res);
}
assert(global_context -> bigtable_cache_file_fp == NULL);
free(global_context -> bigtable_cache_malloc_ptr);
if(global_context -> config.do_breakpoint_detection) free(global_context -> bigtable_cache_malloc_ptr_junc);
free(global_context -> bigtable_cache);
return 0;
}
......
......@@ -160,41 +160,33 @@ int BINsearch_event(chromosome_event_t * event_space, int * event_ids, int is_sm
}
}
#define ANTI_SUPPORTING_READ_LIMIT 100
int anti_supporting_read_scan(global_context_t * global_context)
{
indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
if(indel_context->total_events<1)return 0;
chromosome_event_t * event_space = indel_context -> event_space_dynamic;
int x1, * small_side_ordered_event_ids, * large_side_ordered_event_ids, cancelled_events=0, * cancelled_event_list = malloc(sizeof(int)*ANTI_SUPPORTING_READ_LIMIT), x2;
small_side_ordered_event_ids = malloc(sizeof(int)*indel_context -> total_events);
large_side_ordered_event_ids = malloc(sizeof(int)*indel_context -> total_events);
typedef struct{
int thread_id;
int block_start;
int block_end;
HashTable * result_tab;
int * small_side_ordered_event_ids, * large_side_ordered_event_ids;
chromosome_event_t * event_space;
global_context_t * global_context;
} AT_context_t;
for(x1=0; x1<indel_context->total_events; x1++)
{
small_side_ordered_event_ids[x1]=x1;
large_side_ordered_event_ids[x1]=x1;
}
void * sort_data[3];
sort_data[0] = small_side_ordered_event_ids;
sort_data[1] = event_space;
sort_data[2] = (void *)0xffff; // small_side_ordering
#define ANTI_SUPPORTING_READ_LIMIT 100
merge_sort(sort_data, indel_context->total_events, compare_event_sides, exchange_event_sides, merge_event_sides);
void * anti_support_thread_run(void * v_cont){
AT_context_t * atcont = v_cont;
sort_data[0] = large_side_ordered_event_ids;
sort_data[2] = NULL; // large_side_ordering
int * cancelled_event_list = malloc(sizeof(int)*ANTI_SUPPORTING_READ_LIMIT), x2;
int * small_side_ordered_event_ids = atcont -> small_side_ordered_event_ids, * large_side_ordered_event_ids = atcont -> large_side_ordered_event_ids;
global_context_t * global_context = atcont -> global_context;
chromosome_event_t * event_space = atcont -> event_space;
indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
if(indel_context->total_events<1)return NULL;
merge_sort(sort_data, indel_context->total_events, compare_event_sides, exchange_event_sides, merge_event_sides);
int current_read_number, cancelled_events, x1;
int current_read_number;
for(current_read_number = 0; current_read_number < global_context -> processed_reads_in_chunk; current_read_number++)
for(current_read_number = atcont -> block_start; current_read_number < atcont -> block_end; current_read_number++)
{
int is_second_read;
for (is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read ++)
......@@ -228,8 +220,9 @@ int anti_supporting_read_scan(global_context_t * global_context)
if(event_body -> event_small_side <= coverage_start + 5) continue;
if(event_body -> event_small_side >= coverage_end - 5) continue;
event_body -> anti_supporting_reads ++;
//printf("OCT27-ANTISUP-READ @ %u has SMALL @ %u~%u , INDELS = %d, ASUP = %d\n", coverage_start - 1, event_body -> event_small_side, event_body -> event_large_side, event_body -> indel_length, event_body -> anti_supporting_reads);
long long cur_count = HashTableGet(atcont -> result_tab , NULL+small_side_ordered_event_ids[x1]+1 ) - NULL;
cur_count++;
HashTablePut( atcont -> result_tab , NULL+small_side_ordered_event_ids[x1]+1 , NULL+cur_count );
cancelled_event_list[cancelled_events++] = small_side_ordered_event_ids[x1];
}
......@@ -251,12 +244,83 @@ int anti_supporting_read_scan(global_context_t * global_context)
if(to_be_add){
// printf("OCT27-ANTISUP-READ @ %u has LARGE @ %u~%u, INDELS = %d, ASUP = %d\n", coverage_end, event_body -> event_small_side, event_body -> event_large_side, event_body -> indel_length, event_body -> anti_supporting_reads);
event_body -> anti_supporting_reads ++;
long long cur_count = HashTableGet(atcont -> result_tab , NULL+large_side_ordered_event_ids[x1]+1 ) - NULL;
cur_count++;
HashTablePut( atcont -> result_tab , NULL+large_side_ordered_event_ids[x1]+1 , NULL+cur_count );
}
}
}
}
}
free(cancelled_event_list);
return NULL;
}
void anti_support_add_count(void * ky, void * va, HashTable * tab){
chromosome_event_t * event_space = tab -> appendix1 ;
int eno = ky-NULL-1;
chromosome_event_t * event_body = event_space + eno;
event_body -> anti_supporting_reads+= (va - NULL);
}
int anti_supporting_read_scan(global_context_t * global_context)
{
indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
if(indel_context->total_events<1)return 0;
chromosome_event_t * event_space = indel_context -> event_space_dynamic;
int x1, * small_side_ordered_event_ids, * large_side_ordered_event_ids;
small_side_ordered_event_ids = malloc(sizeof(int)*indel_context -> total_events);
large_side_ordered_event_ids = malloc(sizeof(int)*indel_context -> total_events);
for(x1=0; x1<indel_context->total_events; x1++) {
small_side_ordered_event_ids[x1]=x1;
large_side_ordered_event_ids[x1]=x1;
}
void * sort_data[3];
sort_data[0] = small_side_ordered_event_ids;
sort_data[1] = event_space;
sort_data[2] = (void *)0xffff; // small_side_ordering
merge_sort(sort_data, indel_context->total_events, compare_event_sides, exchange_event_sides, merge_event_sides);
sort_data[0] = large_side_ordered_event_ids;
sort_data[2] = NULL; // large_side_ordering
merge_sort(sort_data, indel_context->total_events, compare_event_sides, exchange_event_sides, merge_event_sides);
AT_context_t ATconts[64];
pthread_t AThreads[64];
int thread_no, last_end = 0;
for(thread_no=0; thread_no< global_context -> config.all_threads; thread_no++){
AT_context_t * atc = ATconts + thread_no;
atc -> thread_id = thread_no;
atc -> block_start = last_end;
atc -> block_end = last_end = global_context -> processed_reads_in_chunk / global_context -> config.all_threads * thread_no;
if(thread_no == global_context -> config.all_threads-1) atc -> block_end = global_context -> processed_reads_in_chunk;
atc -> global_context = global_context;
atc -> result_tab = HashTableCreate(200000);
atc -> small_side_ordered_event_ids = small_side_ordered_event_ids;
atc -> large_side_ordered_event_ids = large_side_ordered_event_ids;
atc -> event_space = event_space;
pthread_create(AThreads + thread_no , NULL, anti_support_thread_run, atc);
}
bigtable_release_result(global_context, NULL, current_read_number, 0);
for(thread_no=0; thread_no< global_context -> config.all_threads; thread_no++){
pthread_join(AThreads[thread_no], NULL);
ATconts[thread_no].result_tab -> appendix1 = event_space;
HashTableIteration( ATconts[thread_no].result_tab, anti_support_add_count );
HashTableDestroy(ATconts[thread_no].result_tab);
}
free(small_side_ordered_event_ids);
......@@ -2799,7 +2863,7 @@ int rectify_read_text(global_context_t * global_context, reassembly_by_voting_bl
unsigned int subread_int = 0, xk2;
for(xk2=0; xk2< global_context -> config.reassembly_subread_length; xk2++)
subread_int = (subread_int << 2) | base2int(next_read_txt[read_offset + xk2]);
gehash_go_q(index, subread_int , read_offset, read_len, 0, block_context -> vote_list_rectify, 22, 0, read_offset, 0, 0x7fffffff);
gehash_go_q(index, subread_int , read_offset, read_len, 0, block_context -> vote_list_rectify, 22, read_offset, 0, 0x7fffffff);
}
memset(block_context -> read_rectify_space, 0, MAX_READ_LENGTH * 4 * sizeof(short));
......@@ -2929,7 +2993,7 @@ int search_window_once(global_context_t * global_context, reassembly_by_voting_b
if(global_context -> config.reassembly_tolerable_voting)
gehash_go_q_tolerable(&block_context->voting_indexes[window_no], subread_int , read_offset, read_len, 0, block_context -> vote_list, 1, 22, 24, 0, read_offset, global_context -> config.reassembly_tolerable_voting, global_context -> config.reassembly_subread_length, 0, 0x7fffffff);
else
gehash_go_q(&block_context->voting_indexes[window_no], subread_int , read_offset, read_len, 0, block_context -> vote_list, 22, 0, read_offset, 0, 0x7fffffff);
gehash_go_q(&block_context->voting_indexes[window_no], subread_int , read_offset, read_len, 0, block_context -> vote_list,0,read_offset, 0, 0x7fffffff);
}
int WINDOW_SEARCH_TREE_WIDTH = 1;
......
......@@ -200,4 +200,5 @@ chromosome_event_t * local_add_indel_event(global_context_t * global_context, th
void print_indel_table(global_context_t * global_context);
int sort_junction_entry_table(global_context_t * global_context);
void mark_event_bitmap(unsigned char * bitmap, unsigned int pos);
int check_event_bitmap(unsigned char * bitmap, unsigned int pos);
#endif
......@@ -49,6 +49,9 @@ static struct option long_options[] =
{"reportPairedMultiBest", no_argument, 0, 0},
{"sv", no_argument, 0, 0},
{"longDel", no_argument, 0, 0},
{"exonAnnotationScreenOut", required_argument, 0, 0},
{"gtfFeature", required_argument, 0, 0},
{"gtfAttr", required_argument, 0, 0},
{"extraColumns", no_argument, 0, 0},
{"forcedPE", no_argument, 0, 0},
{"ignoreUnmapped", no_argument, 0, 0},
......@@ -596,6 +599,9 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
global_context->config.realignment_minimum_variant_distance = 1;
// global_context->config.max_indel_length = max(16, global_context->config.max_indel_length);
}
else if(strcmp("exonAnnotationScreenOut", long_options[option_index].name)==0){
strcpy(global_context->config.exon_annotation_file_screen_out, optarg);
}
else if(strcmp("minVoteCutoff", long_options[option_index].name)==0)
{
global_context->config.max_vote_number_cutoff = atoi(optarg);
......
......@@ -32,6 +32,7 @@ static struct option long_options[] =
{"junctionIns", required_argument, 0, 0},
{"multi", required_argument, 0, 'B'},
{"exonAnnotation", required_argument, 0, 'a'},
{"exonAnnotationScreenOut", required_argument, 0, 0},
{"exonAlias", required_argument, 0, 'A'},
{"exonFormat", required_argument, 0, 'F'},
{"gtfAttr", required_argument, 0, 0},
......@@ -113,13 +114,13 @@ void print_usage_core_subjunc()
SUBREADputs("");
SUBREADputs("# thresholds for mapping");
SUBREADputs("");
SUBREADputs(" -n <int> Number of selected subreads, 10 by default.");
SUBREADputs(" -n <int> Number of selected subreads, 14 by default.");
SUBREADputs("");
SUBREADputs(" -m <int> Consensus threshold for reporting a hit (minimal number of");
SUBREADputs(" subreads that map in consensus) . If paired-end, this gives");
SUBREADputs(" the consensus threshold for the anchor read (anchor read");
SUBREADputs(" receives more votes than the other read in the same pair).");
SUBREADputs(" 3 by default");
SUBREADputs(" 1 by default.");
SUBREADputs("");
SUBREADputs(" -p <int> Consensus threshold for the non- anchor read in a pair. 1 by");
SUBREADputs(" default.");
......@@ -629,6 +630,9 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
{
global_context->config.max_vote_number_cutoff = atoi(optarg);
}
else if(strcmp("exonAnnotationScreenOut", long_options[option_index].name)==0){
strcpy(global_context->config.exon_annotation_file_screen_out, optarg);
}
else if(strcmp("allJunctions", long_options[option_index].name)==0)
{
global_context->config.do_fusion_detection = 1;
......
This diff is collapsed.
This diff is collapsed.