Imported Upstream version 1.5.0-p3+dfsg

parent 8b7078e3
......@@ -35,9 +35,9 @@
\begin{center}
{\Huge\bf Subread/Rsubread Users Guide}\\
\vspace{1 cm}
{\centering\large Subread v1.5.0-p2/Rsubread v1.20.5\\}
{\centering\large Subread v1.5.0-p3/Rsubread v1.22.1\\}
\vspace{1 cm}
\centering 11 April 2016\\
\centering 27 May 2016\\
\vspace{5 cm}
\Large Wei Shi and Yang Liao\\
\vspace{1 cm}
......@@ -764,7 +764,7 @@ Note that, when counting at the meta-feature level, reads that overlap multiple
\subsection{In-built annotations}
In-built gene annotations for genomes \emph{hg19}, \emph{mm10} and \emph{mm9} are included in both Bioconductor {\Rsubread} package and SourceForge {\Subread} package.
In-built gene annotations for genomes \emph{hg38}, \emph{hg19}, \emph{mm10} and \emph{mm9} are included in both Bioconductor {\Rsubread} package and SourceForge {\Subread} package.
These annotations were downloaded from NCBI RefSeq database and then adapted by merging overlapping exons from the same gene to form a set of disjoint exons for each gene.
Genes with the same Entrez gene identifiers were also merged into one gene.
......
......@@ -20,6 +20,7 @@
#include <ctype.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#ifdef MACOS
......@@ -844,6 +845,8 @@ int mac_str(char * str_buff)
}
}
close(sock);
unsigned char mac_address[6];
if (success){
......@@ -886,5 +889,93 @@ int mac_or_rand_str(char * str_buff){
return mac_str(str_buff) && rand_str(str_buff) && mathrand_str(str_buff);
}
#define PI_LONG 3.1415926535897932384626434L
long double fast_fractorial(unsigned int x, long double * buff, int buflen){
if(x<2) return 0;
if(buff != NULL && x < buflen && buff[x]!=0){
return buff[x];
}
long double ret;
if(x<50){
int x1;
ret =0.L;
for(x1 = 2 ; x1 <= x; x1++) ret += logl((long double)(x1));
}else{
ret = logl(x)*1.0L*x - 1.0L*x + 0.5L * logl(2.0L*PI_LONG* x) + 1.L/(12.L*x) - 1.L/(360.L* x*x*x) + 1.L/(1260.L* x*x*x*x*x) - 1.L/(1680.L*x*x*x*x*x*x*x);
}
if(buff != NULL && x < buflen) buff[x]=ret;
return ret;
}
#define BUFF_4D 36
long double fast_freq( unsigned int tab[2][2] , long double * buff, int buflen){
int x0 = -1;
if(buff && buflen > BUFF_4D * BUFF_4D * BUFF_4D * BUFF_4D && tab[0][0] < BUFF_4D && tab[0][1] < BUFF_4D && tab[1][0] < BUFF_4D && tab[1][1] < BUFF_4D ){
x0 = buflen + tab[0][0]* BUFF_4D * BUFF_4D * BUFF_4D + tab[0][1] * BUFF_4D * BUFF_4D + tab[1][0] * BUFF_4D + tab[1][1];
if(buff[x0]!=0) return buff[x0];
}
long double ret = fast_fractorial(tab[0][0]+tab[0][1],buff,buflen)+fast_fractorial(tab[1][0]+tab[1][1],buff,buflen) +
fast_fractorial(tab[0][0]+tab[1][0],buff,buflen)+fast_fractorial(tab[0][1]+tab[1][1],buff,buflen) -
fast_fractorial(tab[0][0],buff,buflen) - fast_fractorial(tab[0][1],buff,buflen) -
fast_fractorial(tab[1][0],buff,buflen) - fast_fractorial(tab[1][1],buff,buflen) -
fast_fractorial(tab[0][0] + tab[0][1] + tab[1][0] + tab[1][1],buff,buflen);
if(x0>=0) buff[x0] = ret;
return ret;
}
double fast_fisher_test_one_side(unsigned int a, unsigned int b, unsigned int c, unsigned int d, long double * buff, int buflen){
unsigned int tab[2][2];
long double P0, P_delta, ret;
tab[0][0]=a;
tab[0][1]=b;
tab[1][0]=c;
tab[1][1]=d;
int x_a, y_a, x_min=-1, y_min=-1;
unsigned int min_a = 0xffffffff;
for(x_a = 0; x_a < 2; x_a++)
for(y_a = 0; y_a < 2; y_a++){
if(tab[x_a][y_a]<min_a){
min_a = tab[x_a][y_a];
x_min = x_a;
y_min = y_a;
}
}
P_delta = fast_freq(tab, buff, buflen);
P0 = ret = exp(P_delta);
//printf("P0=%LG\n", P0);
if(min_a>0){
unsigned int Qa = min_a;
unsigned int Qb = tab[x_min][!y_min];
unsigned int Qc = tab[!x_min][y_min];
unsigned int Qd = tab[!x_min][!y_min];
for(; ; ){
min_a --;
Qb++;Qc++;
P_delta -= logl(Qb*Qc);
P_delta += logl(Qa*Qd);
Qa--;Qd--;
ret += expl(P_delta);
// printf("%LG %LG %LG\n", ret, 1 - (ret - P0), expl(P_delta));
if(min_a < 1) break;
}
}
double ret1 = ret, ret2 = 1 - (ret - P0);
if(min(ret1, ret2) < 0){
return 0.0;
}
return min(ret1, ret2) ;
}
......@@ -69,4 +69,6 @@ int strcmp_number(char * s1, char * s2);
unsigned int reverse_cigar(unsigned int pos, char * cigar, char * new_cigar);
unsigned int find_left_end_cigar(unsigned int right_pos, char * cigar);
int mac_or_rand_str(char * char_14);
double fast_fisher_test_one_side(unsigned int a, unsigned int b, unsigned int c, unsigned int d, long double * frac_buffer, int buffer_size);
#endif
......@@ -6,7 +6,6 @@ all:
@echo " For building subread in Linux, please run ' make -f Makefile.Linux '."
@echo " For building subread in Mac OS X, please run ' make -f Makefile.MacOS '."
@echo " For building subread in FreeBSD, please run ' gmake -f Makefile.FreeBSD '."
@echo " For building subread in Oracle Solaris or OpenSolaris, please run ' gmake -f Makefile.SunOS '."
@echo
@echo " The default compiler is gcc; you may change it by editing the Makefiles for platforms."
@echo
......
......@@ -99,7 +99,7 @@ struct SNP_Calling_Parameters{
#define PRECALCULATE_FACTORIAL 2000000
double * precalculated_factorial;// [PRECALCULATE_FACTORIAL];
long double * precalculated_factorial;// [PRECALCULATE_FACTORIAL];
double factorial_float_real(int a)
{
......@@ -113,7 +113,6 @@ double factorial_float_real(int a)
double factorial_float(int a)
{
if(a<PRECALCULATE_FACTORIAL && (precalculated_factorial[a]!=0.))
return precalculated_factorial[a];
else
......@@ -154,25 +153,43 @@ double fisher_exact_test(int a, int b, int c, int d)
//printf("FET: %d %d %d %d\n", a, b, c, d);
if (a * d > b * c) {
a = a + b; b = a - b; a = a - b;
c = c + d; d = c - d; c = c - d;
}
if (a > d) { a = a + d; d = a - d; a = a - d; }
if (b > c) { b = b + c; c = b - c; b = b - c; }
if(1){
double ret = fast_fisher_test_one_side(a,b,c,d, precalculated_factorial, PRECALCULATE_FACTORIAL);
//SUBREADprintf("FISHER_RES %d %d %d %d %.9f %.9f\n", a,b,c,d, ret, log(ret));
return ret;
}else{
int AA=a, BB=b, CC=c, DD=d;
if (a * d > b * c) {
a = a + b; b = a - b; a = a - b;
c = c + d; d = c - d; c = c - d;
}
double p_sum = 0.0;
if (a > d) { a = a + d; d = a - d; a = a - d; }
if (b > c) { b = b + c; c = b - c; b = b - c; }
double p = fisherSub(a, b, c, d);
while (a >= 0) {
p_sum += p;
if (a == 0) break;
--a; ++b; ++c; --d;
p = fisherSub(a, b, c, d);
}
double p_sum = 0.0;
double p = fisherSub(a, b, c, d);
while (a >= 0) {
p_sum += p;
if (a == 0) break;
--a; ++b; ++c; --d;
p = fisherSub(a, b, c, d);
}
return p_sum;
if(1){
// DON'T PUT IT HERE!
double r1 = fast_fisher_test_one_side(AA,BB,CC,DD, NULL, PRECALCULATE_FACTORIAL);
if(abs(r1-p_sum) / max(r1,p_sum) >= 0.01){
printf("BADR: FAST = %.7f != JAVA %.7f, %d %d %d %d\n", log(r1), log(p_sum),AA,BB,CC,DD);
}else{
printf("GOODR: FAST = %.7f ~= JAVA %.7f, %d %d %d %d\n", log(r1), log(p_sum),AA,BB,CC,DD);
}
}
return p_sum;
}
}
unsigned int fisher_test_size;
......@@ -508,7 +525,8 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
}
float p_middle = fisher_exact_test(a, flanking_unmatched, c, flanking_matched);
//SUBREADprintf("TEST: %u a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d; p=%G; p-cut=%G\n", i,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail, p_middle, p_cutoff);
if(0 && flanking_matched > 10000 && p_middle < 1E-5)
SUBREADprintf("TEST: %u a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d; p=%G; p-cut=%G\n", i,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail, p_middle, p_cutoff);
if(all_result_needed || ( p_middle < p_cutoff && flanking_matched*20>(flanking_matched+ flanking_unmatched )*16))
snp_fisher_raw [i] = p_middle;
else snp_fisher_raw [i] = -999;
......@@ -1296,6 +1314,7 @@ void EXSNP_SIGINT_hook(int param)
unlink(del_name);
}
}
closedir(d);
}
}
......@@ -1328,8 +1347,8 @@ int SNP_calling(char * in_SAM_file, char * out_BED_file, char * in_FASTA_file, c
fisher_test_size = 0;
precalculated_factorial = (double*)malloc(sizeof(double)*PRECALCULATE_FACTORIAL);
for(i=0; i<PRECALCULATE_FACTORIAL; i++)
precalculated_factorial = (long double*)malloc(sizeof(long double)*PRECALCULATE_FACTORIAL * 2);
for(i=0; i<PRECALCULATE_FACTORIAL * 2; i++)
precalculated_factorial[i] = 0.;
......@@ -1493,7 +1512,7 @@ void print_usage_snp(char * myname)
SUBREADputs(" location must have (ie. the minimum coverage). 1 by default.");
SUBREADputs("");
SUBREADputs(" -x <int> Specify the maximum number of mapped reads a SNP-containing");
SUBREADputs(" location have have. 3000 by default. Any location having more than");
SUBREADputs(" location have have. 1000 by default. Any location having more than");
SUBREADputs(" the threshold number of reads will not be considered for SNP");
SUBREADputs(" calling. This option is useful for removing PCR artefacts.");
SUBREADputs("");
......@@ -1567,7 +1586,7 @@ int main_snp_calling_test(int argc,char ** argv)
parameters.supporting_read_rate = 0.;
parameters.min_supporting_read_number = 1;
parameters.min_alternative_read_number = 1;
parameters.max_supporting_read_number = 3000;
parameters.max_supporting_read_number = 1000;
parameters.neighbour_filter_testlen = -1;
parameters.neighbour_filter_rate = 0.000000001;
parameters.min_phred_score = 13;
......
......@@ -1752,7 +1752,7 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
{
unsigned int head_indel_left_edge = head_indel_pos + current_result->selected_position - 1;
//head_indel_left_edge -= max(0, head_indel_movement);
if(head_indel_left_edge>=0 && abs(head_indel_movement)<=global_context -> config.max_indel_length)
if(abs(head_indel_movement)<=global_context -> config.max_indel_length)
{
local_add_indel_event(global_context, thread_context, event_table, read_text + head_indel_pos, head_indel_left_edge, head_indel_movement, 1, 1, 0);
mark_gapped_read(current_result);
......@@ -2126,7 +2126,7 @@ int finalise_db_graph(global_context_t * global_context, reassembly_block_contex
else
{
int test_next;
int all_reads;
int all_reads = 0;
int ignored_moves=0;
int max_move_reads=0;
unsigned long long int previous_key = current_key<<2;
......@@ -3886,6 +3886,7 @@ void COREMAIN_SIGINT_hook(int param)
unlink(del_name);
}
}
closedir(d);
}
}
......
......@@ -1221,34 +1221,37 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
{
chimeric_sections = chimeric_cigar_parts(global_context, r->linear_position, is_first_section_jumped ^ current_strand, is_first_section_jumped, r->current_cigar_decompress, r->out_poses, output_context->out_cigar_buffer, r->out_strands, read_len, r->out_lens);
int xk1;
r->chimeric_sections = chimeric_sections;
strcpy(r->out_cigars[0], output_context->out_cigar_buffer[0]);
for(xk1=1; xk1<chimeric_sections; xk1++)
{
int chimeric_pos;
char * chimaric_chr;
strcpy(r->out_cigars[xk1], output_context->out_cigar_buffer[xk1]);
unsigned int vitual_head_pos = r->out_poses[xk1];
char strand_xor = (r->out_strands[xk1] == '-') != is_second_read;//!= (r->out_strands[0]=='-') ;
if(chimeric_sections > 0){
int xk1;
r->chimeric_sections = chimeric_sections;
strcpy(r->out_cigars[0], output_context->out_cigar_buffer[0]);
for(xk1=1; xk1<chimeric_sections; xk1++)
{
int chimeric_pos;
char * chimaric_chr;
strcpy(r->out_cigars[xk1], output_context->out_cigar_buffer[xk1]);
unsigned int vitual_head_pos = r->out_poses[xk1];
char strand_xor = (r->out_strands[xk1] == '-') != is_second_read;//!= (r->out_strands[0]=='-') ;
//if(( r->out_strands[xk1] == '-' ) != (r->out_strands[0]=='-' )) vitual_head_pos = move_to_read_head(vitual_head_pos, r->out_cigars[xk1]);
//if(( r->out_strands[xk1] == '-' ) != (r->out_strands[0]=='-' )) vitual_head_pos = move_to_read_head(vitual_head_pos, r->out_cigars[xk1]);
if(0==locate_gene_position_max(vitual_head_pos ,& global_context -> chromosome_table, & chimaric_chr, & chimeric_pos, NULL, NULL, 0+r->out_lens[xk1]))
{
int soft_clipping_movement = 0;
soft_clipping_movement = get_soft_clipping_length(r->out_cigars[xk1]);
assert(chimaric_chr);
sprintf(r->additional_information + strlen(r->additional_information), "\tCG:Z:%s\tCP:i:%u\tCT:Z:%c\tCC:Z:%s", r->out_cigars[xk1] , max(1,chimeric_pos + soft_clipping_movement + 1), strand_xor?'-':'+' , chimaric_chr );
if(0==locate_gene_position_max(vitual_head_pos ,& global_context -> chromosome_table, & chimaric_chr, & chimeric_pos, NULL, NULL, 0+r->out_lens[xk1]))
{
int soft_clipping_movement = 0;
soft_clipping_movement = get_soft_clipping_length(r->out_cigars[xk1]);
assert(chimaric_chr);
sprintf(r->additional_information + strlen(r->additional_information), "\tCG:Z:%s\tCP:i:%u\tCT:Z:%c\tCC:Z:%s", r->out_cigars[xk1] , max(1,chimeric_pos + soft_clipping_movement + 1), strand_xor?'-':'+' , chimaric_chr );
}
else is_r_OK = 0;
}
else is_r_OK = 0;
}
r->linear_position = r->out_poses[0];
r->strand = r->out_strands[0]=='-';
r->linear_position = r->out_poses[0];
r->strand = r->out_strands[0]=='-';
strcpy(r->cigar , r->out_cigars[0]);
strcpy(r->cigar , r->out_cigars[0]);
}else is_r_OK = 0;
}
r->soft_clipping_movements = get_soft_clipping_length(r->cigar);
if(is_r_OK)
r->soft_clipping_movements = get_soft_clipping_length(r->cigar);
}
if(is_r_OK)
......@@ -4189,6 +4192,14 @@ int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos
int curr_offset, new_offset;
locate_gene_position_max(current_perfect_cursor, &global_context -> chromosome_table, & curr_chr, & curr_offset, NULL, NULL, 1);
locate_gene_position_max(jummped_location , &global_context -> chromosome_table, & new_chr, & new_offset, NULL, NULL, 1);
if( curr_chr == NULL || new_chr == NULL ){
/*
char outpos[100];
absoffset_to_posstr(global_context, sel_pos + 1, outpos);
SUBREADprintf("Wrong CIGAR: mapped to %s, CIGAR=%s\n", outpos , in_cigar);
*/
return -1;
}
assert(curr_chr);
assert(new_chr);
is_chro_jump = (curr_chr != new_chr);
......
This diff is collapsed.
......@@ -36,6 +36,7 @@
#define GENE_INPUT_SAM_PAIR_2 95
#define MIN_FILE_POINTERS_ALLOWED 50
#define FILE_TYPE_SAM 50
#define FILE_TYPE_BAM 500
......@@ -120,6 +121,8 @@ typedef struct {
int is_single_end_mode;
int force_do_not_sort;
int is_finished;
int merge_level_finished;
int max_file_open_number;
subread_lock_t input_fp_lock;
subread_lock_t output_header_lock;
subread_lock_t unsorted_notification_lock;
......@@ -303,4 +306,5 @@ int SAM_pairer_multi_thread_header (void * pairer_vp, int thread_no, int is_text
int SAM_pairer_writer_create( SAM_pairer_writer_main_t * bam_main , int all_threads, int has_dummy , int BAM_output, int BAM_compression_level, char * out_file);
void SAM_pairer_writer_destroy( SAM_pairer_writer_main_t * bam_main ) ;
int SAM_pairer_iterate_int_tags(unsigned char * bin, int bin_len, char * tag_name, int * saved_value);
int SAM_pairer_warning_file_open_limit();
#endif
SUBREAD_VERSION_BASE=1.5.0-p2
SUBREAD_VERSION_BASE=1.5.0-p3
SUBREAD_VERSION_DATE=$(SUBREAD_VERSION_BASE)-$(shell date +"%d%b%Y")
SUBREAD_VERSION="$(SUBREAD_VERSION_DATE)"
SUBREAD_VERSION="$(SUBREAD_VERSION_BASE)"
......
......@@ -109,6 +109,7 @@ void PROPMAPPED_SIGINT_hook(int param)
unlink(del_name);
}
}
closedir(d);
}
}
......
......@@ -936,14 +936,14 @@ int load_feature_info(fc_thread_global_context_t *global_context, const char * a
ret_features[xk1].chro_name_pos_delta = chro_name_pos - ret_features[xk1].feature_name_pos;
ret_features[xk1].start = atoi( start_ptr );// start
if(ret_features[xk1].start<0 || ret_features[xk1].start>0x7fffffff)
if(ret_features[xk1].start>0x7fffffff)
{
ret_features[xk1].start = 0;
print_in_box(80,0,0,"WARNING the %d-th line has a negative chro coordinate.", lineno);
}
ret_features[xk1].end = atoi( end_ptr );//end
if(ret_features[xk1].end<0 || ret_features[xk1].end>0x7fffffff)
if(ret_features[xk1].end>0x7fffffff)
{
ret_features[xk1].end = 0;
print_in_box(80,0,0,"WARNING the %d-th line has a negative chro coordinate.", lineno);
......@@ -4145,8 +4145,7 @@ int readSummary(int argc,char *argv[]){
else isRestrictlyNoOvelrapping = 0;
if(SAM_pairer_warning_file_open_limit()) return -1;
fc_thread_global_context_t global_context;
......
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include "subread.h"
#include "HelperFunctions.h"
double factorial_float(int a)
{
double ret = 0;
while(a)
ret += log(a--);
return ret;
}
double fisherSub(int a, int b, int c, int d)
{
double ret = factorial_float(a+b) + factorial_float(c+d) + factorial_float(a+c) + factorial_float(b+d) ;
ret -= factorial_float(a) + factorial_float(b) + factorial_float(c) + factorial_float(d) + factorial_float(a+b+c+d);
return pow(2.71828183, ret);
}
/**
* See HELP string or run with no arguments for usage.
* <p>
* The code used to calculate a Fisher p-value comes originally from a
* <a href="http://infofarm.affrc.go.jp/~kadasowa/fishertest.htm">JavaScript program</a>
* by T. Kadosawa (kadosawa@niaes.affrc.go.jp).
* Retrieved from http://www.users.zetnet.co.uk/hopwood/tools/StatTests.java on 3/Jul/2012
*
* @author David Hopwood
* @date 2000/04/23
*/
double fisher_exact_test(int a, int b, int c, int d)
{
if (a * d > b * c) {
a = a + b; b = a - b; a = a - b;
c = c + d; d = c - d; c = c - d;
}
if (a > d) { a = a + d; d = a - d; a = a - d; }
if (b > c) { b = b + c; c = b - c; b = b - c; }
double p_sum = 0.0;
double p = fisherSub(a, b, c, d);
while (a >= 0) {
p_sum += p;
if (a == 0) break;
--a; ++b; ++c; --d;
p = fisherSub(a, b, c, d);
}
return p_sum;
}
long double fastfact(int x){
return logl(x)*x - x + 0.5 * logl(2*M_PI* x) + 1/(12.*x) - 1./(360.* x*x*x) + 1./(1260.* x*x*x*x*x) - 1./(1680.*x*x*x*x*x*x*x);// + (x>60?0:(1./(1188.*x*x*x*x*x*x*x*x*x ) ));
}
main(){
unsigned int a = 10 , c = 11,
b = 11 , d = 5000;
double fisher, fisher_old;
fisher = fast_fisher_test_one_side(a,b,c,d, NULL, 0);
fisher_old = fisher_exact_test(a,b,c,d);
printf("Log fisher = %.7f ; Old fisher = %.7f\n", log(fisher),log(fisher_old));
long double x1 = 1E-19L + 1E-20L;
long double x2 = 1L - expl(logl(0.5L) + logl(2.0L));
printf("New Vals: x1=%LG, x2=%LG\n", x1,x2);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment