Imported Upstream version 1.4.6-p5+dfsg

parent 91f65e51
@article{liao,
author={Liao, Y. and Smyth, G. K. and Shi, W.},
year={2013},
title={The Subread aligner: fast, accurate and scalable read mapping by seed-and-vote},
journal={Nucleic Acids Research},
volume={41},
issue={10},
pages={e108}
}
@article{TangNC2013,
author={Tang, K. W. and Alaei-Mahabadi, B. and Samuelsson, T. and Lindh, M. and Larsson, E.},
year={2013},
title={{The landscape of viral expression and host gene fusion and adaptation in human cancer}},
journal={Nature Communications.},
volume={2013 Oct 1;4:2513. doi: 10.1038/ncomms3513},
pages={}
}
@article{ManNI2013,
author={Man, K. and Miasari, M. and Shi, W. and Xin, A. and Henstridge, D. C. and Preston, S. and Pellegrini, M. and Belz, G. T. and Smyth, G. K. and Febbraio, M. A. and Nutt, S. L. and Kallies, A.},
year={2013},
title={{The transcription factor IRF4 is essential for TCR affinity-mediated metabolic programming and clonal expansion of T cells}},
journal={Nature Immunology},
volume={2013 Sep 22. doi: 10.1038/ni.2710},
pages={}
}
@article{SpangenbergSCR2013,
author={Spangenberg, L. and Shigunov, P. and Abud, A. P. and Cofré, A. R. and Stimamiglio, M. A. and Kuligovski, C. and Zych, J. and Schittini, A. V. and Costa, A. D. and Rebelatto, C. K. and Brofman, P. R. and Goldenberg, S. and Correa, A. and Naya, H. and Dallagiovanna, B.},
year={2013},
title={{Polysome profiling shows extensive posttranscriptional regulation during human adipocyte stem cell differentiation into adipocytes}},
journal={Stem Cell Research},
volume={11},
pages={902-12}
}
@article{tang,
author={Tang, J. Z. and Carmichael, C. L. and Shi, W. and Metcalf, D. and Ng, A. P. and Hyland, C. D. and Jenkins, N. A. and Copeland, N. G. and Howell, V. M. and Zhao, Z. J. and Smyth, G. K. and Kile, B. T. and Alexander, W. S.},
year={2013},
title={{Transposon mutagenesis reveals cooperation of ETS family transcription factors with signaling pathways in erythro-megakaryocytic leukemia}},
journal={Proc Natl Acad Sci U S A},
volume={110},
pages={6091-6}
}
@article{ezh2,
author={Pal, B. and Bouras, T. and Shi, W and Vaillant, F. and Sheridan, J. M. and Fu, N. and Breslin, K. and Jiang, K. and Ritchie, M. E. and Young, M. and Lindeman, G. J. and Smyth, G. K. and Visvader, J. E.},
year={2013},
title={{Global changes in the mammary epigenome are induced by hormonal cues and coordinated by Ezh2}},
journal={Cell Reports},
volume={3},
pages={411-26}
}
@article{fcounts,
author={Liao, Y. and Smyth, G. K. and Shi, W.},
year={2014},
title={{featureCounts: an efficient general-purpose program for assigning sequence reads to genomic features.}},
journal={Bioinformatics},
volume={30},
issue={7},
pages={923-30}
}
@article{seqc,
author={SEQC/MAQC-III Consortium},
year={2014},
title={{A comprehensive assessment of RNA-seq accuracy, reproducibility and information content by the Sequencing Quality Control Consortium.}},
journal={Nature Biotechnology},
volume={32},
issue={9},
pages={903-14}
}
@article{exactsnp,
author={Liao, Y. and Smyth, G. K. and Shi, W.},
year={},
title={{ExactSNP: an efficient and accurate SNP calling algorithm}},
journal={In preparation},
}
This diff is collapsed.
......@@ -23,11 +23,11 @@
#include "subread.h"
#include "HelperFunctions.h"
int RSubread_parse_CIGAR_string(const char * CIGAR_Str, unsigned int * Staring_Points, unsigned short * Section_Length)
int RSubread_parse_CIGAR_string(const char * CIGAR_Str, int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos, unsigned short * Section_Chro_Length, int * is_junction_read)
{
unsigned int tmp_int=0;
int cigar_cursor=0;
unsigned short read_cursor=0;
unsigned short current_section_chro_len=0, current_section_start_read_pos = 0, read_cursor = 0;
unsigned int chromosome_cursor=0;
int ret=0;
......@@ -41,28 +41,32 @@ int RSubread_parse_CIGAR_string(const char * CIGAR_Str, unsigned int * Staring_P
}
else
{
if(ch == 'M' || ch == 'D')
{
if(ch == 'S')
read_cursor += tmp_int;
else if(ch == 'M') {
read_cursor += tmp_int;
current_section_chro_len += tmp_int;
chromosome_cursor += tmp_int;
}
else if(ch == 'N' || ch == 0)
{
if(ret <6)
} else if(ch == 'N' || ch == 'D' || ch=='I' || ch == 0) {
if('N' == ch)(*is_junction_read)=1;
if(ret < FC_CIGAR_PARSER_ITEMS)
{
if(read_cursor>0)
if(current_section_chro_len>0)
{
Staring_Points[ret] = chromosome_cursor - read_cursor;
Section_Length[ret] = read_cursor;
Section_Start_Chro_Pos[ret] = chromosome_cursor - current_section_chro_len;
Section_Start_Read_Pos[ret] = current_section_start_read_pos;
Section_Chro_Length[ret] = current_section_chro_len;
ret ++;
}
}
read_cursor = 0;
current_section_chro_len = 0;
if(ch == 'I') read_cursor += tmp_int;
else if(ch == 'N' || ch == 'D') chromosome_cursor += tmp_int;
current_section_start_read_pos = read_cursor;
if(ch == 'N') chromosome_cursor += tmp_int;
else break;
if(ch == 0) break;
}
//printf("C=%c, TV=%d, CC=%d, RC=%d\n", ch, tmp_int, chromosome_cursor, read_cursor);
//printf("C=%c, TV=%d, CC=%d, RC=%d\n", ch, tmp_int, chromosome_cursor, current_section_chro_len);
tmp_int = 0;
}
if(cigar_cursor>100) return -1;
......@@ -73,15 +77,18 @@ int RSubread_parse_CIGAR_string(const char * CIGAR_Str, unsigned int * Staring_P
void display_sections(char * CIGAR_Str)
{
unsigned int Staring_Points[6];
unsigned short Section_Length[6];
int retv = RSubread_parse_CIGAR_string(CIGAR_Str, Staring_Points, Section_Length);
int is_junc=0;
int Section_Start_Chro_Pos[FC_CIGAR_PARSER_ITEMS];
unsigned short Section_Start_Read_Pos[FC_CIGAR_PARSER_ITEMS];
unsigned short Section_Chro_Length[FC_CIGAR_PARSER_ITEMS];
int retv = RSubread_parse_CIGAR_string(CIGAR_Str, Section_Start_Chro_Pos, Section_Start_Read_Pos, Section_Chro_Length, &is_junc);
int x1;
SUBREADprintf("Cigar=%s ; Sections=%d\n", CIGAR_Str, retv);
for(x1=0; x1<retv; x1++)
{
SUBREADprintf(" Section #%d: offset=%u length=%u\n",x1, Staring_Points[x1], Section_Length[x1]);
SUBREADprintf(" Section #%d: chro_offset=%d, read_offset=%u length=%u\n",x1, Section_Start_Chro_Pos[x1], Section_Start_Read_Pos[x1], Section_Chro_Length[x1]);
}
SUBREADprintf("\n");
......@@ -331,7 +338,7 @@ void main()
void testi_helper_1_main()
#endif
{
hpl_test2_func();
hpl_test1_func();
}
char *str_replace(char *orig, char *rep, char *with) {
......
......@@ -29,7 +29,7 @@
// This function returns the number of sections found in the CIGAR string. It returns -1 if the CIGAR string cannot be parsed.
int RSubread_parse_CIGAR_string(const char * CIGAR_Str, unsigned int * Staring_Points, unsigned short * Section_Length);
int RSubread_parse_CIGAR_string(const char * CIGAR_Str, int * Staring_Chro_Points, unsigned short * Section_Start_Read_Pos, unsigned short * Section_Length, int * is_junction_read);
// This function try to find the attribute value of a given attribute name from the extra column string in GTF/GFF.
......
......@@ -1854,8 +1854,8 @@ int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t *
sprintf(piece_cigar+strlen(piece_cigar), "%d%c", abs(event_after->indel_length), event_after->indel_length>0?'D':'I');
else if(event_after -> event_type == CHRO_EVENT_TYPE_JUNCTION||event_after -> event_type == CHRO_EVENT_TYPE_FUSION)
{
char jump_mode = current_section -> is_connected_to_large_side?'B':'N';
if(event_after -> is_strand_jumped) jump_mode = tolower(jump_mode);
//char jump_mode = current_section -> is_connected_to_large_side?'B':'N';
//if(event_after -> is_strand_jumped) jump_mode = tolower(jump_mode);
// the distance in CIGAR is the NEXT UNWANTED BASE of piece#1 to the FIRST WANTED BASE in piece#2
int delta_one ;
......@@ -1880,8 +1880,24 @@ int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t *
delta_one -= (next_section->read_pos_end - next_section-> read_pos_start - 1);
}
}
char jump_mode = current_section -> is_connected_to_large_side?'B':'N';
long long int movement = event_after -> event_large_side;
movement -= event_after -> event_small_side - delta_one;
if(1){
if(jump_mode == 'B' && movement < 0){
movement = - movement;
jump_mode = 'N';
}else if(jump_mode == 'N' && movement < 0){
movement = - movement;
jump_mode = 'B';
}
}
if(event_after -> is_strand_jumped) jump_mode = tolower(jump_mode);
sprintf(piece_cigar+strlen(piece_cigar), "%u%c", event_after -> event_large_side - event_after -> event_small_side + delta_one, jump_mode);
sprintf(piece_cigar+strlen(piece_cigar), "%u%c", (int)movement, jump_mode);
if(event_after -> indel_at_junction) sprintf(piece_cigar+strlen(piece_cigar), "%dI", event_after -> indel_at_junction);
is_junction_read ++;
}
......@@ -1928,7 +1944,7 @@ int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t *
//#warning " ========== COMMENT THIS LINE !! ========="
//if(explain_context -> pair_number == 999999)
if(0 && memcmp(explain_context -> read_name, TTTSNAME, 26)==0)
if(0 && memcmp(explain_context -> read_name, "H7TVLADXX140423:2:1112:17883:23072", 32)==0)
printf("%s : POS=%u\tCIGAR=%s\tMM=%d > %d?\tVOTE=%d > %0.2f x %d ?\tQUAL=%d\tBRNO=%d\n", explain_context -> read_name, final_position , tmp_cigar, mismatch_bases, applied_mismatch, result -> selected_votes, global_context -> config.minimum_exonic_subread_fraction,result-> used_subreads_in_vote, final_qual, explain_context -> best_read_id);
if(mismatch_bases <= applied_mismatch && is_exonic_read_fraction_OK)
......
......@@ -10,7 +10,7 @@
#include "input-files.h"
#include "hashtable.h"
#define COVERAGE_MAX_INT 254
#define COVERAGE_MAX_INT 0x7ffffff0
unsigned long long all_counted;
typedef unsigned int coverage_bin_entry_t;
int is_BAM_input = 0;
......@@ -28,9 +28,10 @@ static struct option cov_calc_long_options[] =
void calcCount_usage()
{
SUBREADprintf("\ncoveageCount v%s\n\n", SUBREAD_VERSION);
SUBREADprintf("Counting the coverage of mapped reads at each location on the entire reference genome.\n\n");
SUBREADprintf("./ncoveageCount -i <sam_bam_input> -o <output_prefix>\n\n");
SUBREADprintf("\ncoverageCount v%s\n\n", SUBREAD_VERSION);
SUBREADprintf("This utility program counts the coverage of mapped reads at each location on the entire reference genome. It generates a number of binary files, each corresponding to a chromosome that is listed on the header of the input SAM or BAM file. Each of the binary file consists of many 4-byte integers (little-endian order), indicating the number of reads spanning each location on the corresponded chromosome; the file offset in bytes is calculated by the chromosomal location (zero-based) times four.\n\n");
SUBREADprintf("./coverageCount -i <sam or bam file> -o <output_prefix>\n\n");
SUBREADputs("");
}
void add_chro(char *sam_h)
......@@ -126,10 +127,11 @@ int covCalc()
}
else
{
unsigned int Staring_Points[6];
unsigned short Section_Lengths[6];
int Staring_Points[FC_CIGAR_PARSER_ITEMS];
unsigned short Staring_Read_Points[FC_CIGAR_PARSER_ITEMS];
unsigned short Section_Lengths[FC_CIGAR_PARSER_ITEMS];
int flags=0, x1;
int flags=0, x1, is_junc = 0;
char cigar_str[200];
char chro[200];
unsigned int pos = 0;
......@@ -148,7 +150,7 @@ int covCalc()
coverage_bin_entry_t * chrbin = (coverage_bin_entry_t*) bin_entry[0];
unsigned int chrlen = (void *)( bin_entry[0]) - NULL;
int cigar_sections = RSubread_parse_CIGAR_string(cigar_str, Staring_Points, Section_Lengths);
int cigar_sections = RSubread_parse_CIGAR_string(cigar_str, Staring_Points, Staring_Read_Points, Section_Lengths, &is_junc);
for(x1 = 0; x1 < cigar_sections; x1++)
{
unsigned int x2;
......
SUBREAD_VERSION="1.4.6-p4"
SUBREAD_VERSION="1.4.6-p5"
STATIC_MAKE=
#STATIC_MAKE= -static
This diff is collapsed.
......@@ -66,6 +66,7 @@
#define MAX_INDEL_SECTIONS 7
//#define XBIG_MARGIN_RECORD_SIZE 24
#define MAX_INSERTION_LENGTH 200
#define FC_CIGAR_PARSER_ITEMS 9
//#define BASE_BLOCK_LENGTH 15000000
//#define NEED_SUBREAD_STATISTIC
......
# Program:featureCounts v1.4.6-p4; Command:"../../bin/featureCounts" "-a" "data/test-minimum.GTF" "-o" "result/test-minimum.FC" "data/test-minimum.sam"
Geneid Chr Start End Strand Length data/test-minimum.sam
simu_gene1 chr3;chr3;chr3 100;20000;40000 10000;30000;89000 +;+;+ 68903 15
simu_gene2 chr3;chr3 100010;102000 101000;131000 +;+ 29992 4
simu_gene3 chr3;chr3;chr3;chr3 500010;502000;504000;600000 501000;503000;529000;669000 -;-;-;- 95994 10
simu_gene4 chr3;chr3;chr3 602000;672000;702000 631000;699000;719000 +;+;+ 73003 2
simu_gene5 chr4;chr4;chr4;chr4 20000;120000;200000;220000 100000;190000;210000;300000 -;-;-;- 240004 74
simu_gene6 chr4;chr4 420000;500000 490000;560000 -;- 130002 30
simu_gene7 chr5;chr5;chr5 120000;500000;970000 490000;960000;1000000 -;-;- 860003 254
Status data/test-minimum.sam
Assigned 389
Unassigned_Ambiguity 2
Unassigned_MultiMapping 191
Unassigned_NoFeatures 416
Unassigned_Unmapped 0
Unassigned_MappingQuality 0
Unassigned_FragmentLength 0
Unassigned_Chimera 0
Unassigned_Secondary 0
Unassigned_Nonjunction 0
Unassigned_Duplicate 0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment