Skip to content
Commits on Source (6)
subread (1.6.1+dfsg-1) unstable; urgency=medium
[ Steffen Möller ]
* Added new d/u/metadata file
[ Alexandre Mestiashvili ]
* New upstream version 1.6.1+dfsg
* Update d/control, bump Policy to 4.1.3, fix Vcs-* fields to point to salsa
* Refresh patches
* Update copyright years
-- Alexandre Mestiashvili <mestia@debian.org> Wed, 04 Apr 2018 17:04:57 +0000
subread (1.6.0+dfsg-1) unstable; urgency=medium
* Update d/watch to version 4
......
......@@ -9,9 +9,9 @@ Build-Depends: bc,
help2man,
python,
zlib1g-dev
Standards-Version: 4.1.1
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/subread.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/subread.git
Standards-Version: 4.1.3
Vcs-Browser: https://salsa.debian.org/med-team/subread
Vcs-Git: https://salsa.debian.org/med-team/subread.git
Homepage: http://sourceforge.net/projects/subread/
Package: subread
......
......@@ -4,11 +4,11 @@ Source: http://sourceforge.net/projects/subread/files/
Files-Excluded: doc/*.pdf
Files: *
Copyright: 2011-2015, Dr Yang Liao, Dr Wei Shi
Copyright: 2011-2018, Dr Yang Liao, Dr Wei Shi
License: GPL-3.0+
Files: debian/*
Copyright: 2015 Alexandre Mestiashvili <alex@biotec.tu-dresden.de>
Copyright: 2018 Alexandre Mestiashvili <alex@biotec.tu-dresden.de>
License: GPL-3.0+
License: GPL-3.0+
......
......@@ -23,12 +23,12 @@ Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge long-hashtable core-bigtable seek-zlib
--- subread.orig/src/longread-mapping/Makefile
+++ subread/src/longread-mapping/Makefile
@@ -2,10 +2,10 @@
include make.version
--- subread.orig/src/longread-one/Makefile
+++ subread/src/longread-one/Makefile
@@ -1,10 +1,10 @@
OPT_LEVEL = 3
include ../makefile.version
include make.version
-CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D_FILE_OFFSET_BITS=64 -DSUBREAD_VERSION=\"${SUBREAD_VERSION}\"
-LDFLAGS = -lpthread -lz -lm -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE # -DREPORT_ALL_THE_BEST
+CCFLAGS += -O${OPT_LEVEL} -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D_FILE_OFFSET_BITS=64 -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\"
......@@ -37,5 +37,5 @@ Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
-CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb
+CC = ${CC_EXEC} ${CFLAGS} ${CPPFLAGS} ${CCFLAGS} -fmessage-length=0 -ggdb
ALL_LIBS=LRMsorted-hashtable LRMbase-index LRMchro-event LRMhelper seek-zlib LRMfile-io hashtable
ALL_LIBS=LRMsorted-hashtable LRMbase-index LRMchro-event LRMhelper LRMseek-zlib LRMfile-io LRMhashtable
......@@ -2,7 +2,7 @@ Subject: Fix ftbfs on kfreebsd
From: Alex Mestiashvili <alex@biotec.tu-dresden.de>
--- subread.orig/src/HelperFunctions.c
+++ subread/src/HelperFunctions.c
@@ -773,6 +773,9 @@
@@ -777,6 +777,9 @@
#if defined(FREEBSD) || defined(__MINGW32__)
return 1;
#else
......@@ -12,7 +12,7 @@ From: Alex Mestiashvili <alex@biotec.tu-dresden.de>
#ifdef MACOS
int mib[6], x1, ret = 1;
size_t len;
@@ -863,6 +866,7 @@
@@ -867,6 +870,7 @@
return 1;
#endif
#endif
......
......@@ -2,7 +2,7 @@ Description: Fix typo
Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
--- subread.orig/src/sambam-file.c
+++ subread/src/sambam-file.c
@@ -164,7 +164,7 @@
@@ -186,7 +186,7 @@
return "MIDNSHP=X"[ch];
else
{
......
......@@ -36,9 +36,9 @@
\begin{center}
{\Huge\bf Subread/Rsubread Users Guide}\\
\vspace{1 cm}
{\centering\large Subread v1.6.0/Rsubread v1.28.0\\}
{\centering\large Subread v1.6.1/Rsubread v1.29.1\\}
\vspace{1 cm}
\centering 14 Nov 2017\\
\centering 21 March 2018\\
\vspace{5 cm}
\Large Wei Shi and Yang Liao\\
\vspace{1 cm}
......@@ -48,7 +48,7 @@ The Walter and Eliza Hall Institute of Medical Research\\
The University of Melbourne\\
Melbourne, Australia\\}
\vspace{7 cm}
\centering Copyright \small{\copyright} 2011 - 2017\\
\centering Copyright \small{\copyright} 2011 - 2018\\
\end{center}
\end{titlepage}
......@@ -58,7 +58,7 @@ Melbourne, Australia\\}
\chapter{Introduction}
The Subread/Rsubread packages comprise a suite of high-performance software programs for processing next-generation sequencing data.
Included in these packages are \code{Subread} aligner, \code{Subjunc} aligner, \code{Subindel} long indel detection program, \code{featureCounts} read quantification program, \code{exactSNP} SNP calling program and other utility programs.
Included in these packages are \code{Subread} aligner, \code{Subjunc} aligner, \code{Sublong} long-read aligner, \code{Subindel} long indel detection program, \code{featureCounts} read quantification program, \code{exactSNP} SNP calling program and other utility programs.
This document provides a detailed description to the programs included in the packages.
\code{Subread} and \code{Subjunc} aligners adopt a mapping paradigm called ``seed-and-vote'' \cite{liao}.
......@@ -426,7 +426,7 @@ Arguments used by \code{sublong} only are marked with $^{***}$.
\hline
Arguments & Description \\
\hline
-a $<string>$\newline (\code{useAnnotation, annot.inbuilt, annot.ext}) & Name of a gene annotation file that includes chromosomal coordinates of exons from each gene. GTF/GFF format by default. See -F option for supported formats. Users may use the inbuilt annotations included in this package (SAF format) for human and mouse data. Exon-exon junctions are inferred by connecting each pair of neighboring exons from the same gene.\\
-a $<string>$\newline (\code{useAnnotation, annot.inbuilt, annot.ext}) & Name of a gene annotation file that includes chromosomal coordinates of exons from each gene. GTF/GFF format by default. See -F option for supported formats. Users may use the inbuilt annotations included in this package (SAF format) for human and mouse data. Exon-exon junctions are inferred by connecting each pair of neighboring exons from the same gene. Gzipped file is accepted. \\
\hline
-A $<string>$\newline (\code{chrAliases}) & Name of a comma-delimited text file that includes aliases of chromosome names. This file should contain two columns. First column contains names of chromosomes included in the SAF or GTF annotation and second column contains corresponding names of chromosomes in the reference genome. No column headers should be provided. Also note that chromosome names are case sensitive. This file can be used to match chromosome names between the annotation and the reference genome.\\
\hline
......@@ -440,7 +440,7 @@ Arguments & Description \\
\hline
-F $<string>$ \newline (\code{isGTF}) & Specify format of the provided annotation file. Acceptable formats include `GTF' (or compatible GFF format) and `SAF'. Default format in SourceForge {\Subread} is `GTF'. Default format in {\Rsubread} is `SAF'. \\
\hline
-i $<string> \newline (\code{index}) $ & Specify the base name of the index. Full index (not gapped index) must be provided for \code{sublong} aligner, ie. `-F' option must be specified when running \code{subread-buildindex} for index building.\\
-i $<string> \newline (\code{index}) $ & Specify the base name of the index. The index used by \code{sublong} aligner must be a full index and has only one block, ie. `-F' and `-B' options must be specified when building index with \code{subread-buildindex}.\\
\hline
-I $<int>$ \newline (\code{indels}) & Specify the number of INDEL bases allowed in the mapping. 5 by default. Indels of up to 200bp long can be detected.\\
\hline
......@@ -705,11 +705,8 @@ The {\featureCounts} program can be readily used for summarizing reads to miRNA
\chapter{Mapping long sequence reads}
\section{Short description}
We developed a new long-read aligner called {\Sublong}, which is also based on the seed-and-vote mapping strategy.
{\Sublong} is an order of magnitude faster than existing long-read aligners.
Our simulation results also show that {\Sublong} performs better in mapping accuracy.
We have developed a new long-read aligner called {\Sublong}, which is also based on the seed-and-vote mapping strategy.
Parameters of {\Sublong} program can be found in Table 2.
......@@ -847,6 +844,10 @@ Users can specify the `-O' option (set \code{allowMultiOverlap} to \code{TRUE} i
If a read is both multi-mapping and multi-overlapping, then each overlapping meta-feature/feature will receive a fractional count of $1/(x*y)$ when `-O', `-M', and `--fraction' are all specified.
Note that each alignment reported for a multi-mapping read is assessed separately for overlapping with multiple meta-features/features.
When multi-mapping reads are reported with primary and secondary alignments and both `-M' and `--primary' are specified, only primary alignments will be considered in counting and secondary alignments will be ignored.
If `-M' is specified but `--primary' is not specified, both primary and secondary alignments will be considered in counting.
Note that all the alignments reported for a multi-mapping read are expected to have a `NH' tag and whether an alignment is primary or secondary is determined by using bit {0x100} in the FLAG field of the alignment record.
\subsection{Read filtering}
\label{sec:read_filtering}
......@@ -917,7 +918,7 @@ Arguments & Description \\
\hline
input\_files \newline (\code{files}) & Give the names of input read files that include the read mapping results. The program automatically detects the file format (SAM or BAM). Multiple files can be provided at the same time. Files are allowed to be provided via $<stdin>$. \\
\hline
-a $<string>$ \newline (\code{annot.ext, annot.inbuilt}) & Give the name of an annotation file. \\
-a $<string>$ \newline (\code{annot.ext, annot.inbuilt}) & Provide name of an annotation file. See \code{-F} option for file format. Gzipped file is accepted.\\
\hline
-A \newline (\code{chrAliases}) & Provide a chromosome name alias file to match chr names in annotation with those in the reads. This should be a two-column comma-delimited text file. Its first column should include chr names in the annotation and its second column should include chr names in the reads. Chr names are case sensitive. No column header should be included in the file.\\
\hline
......@@ -976,7 +977,7 @@ $--$fraction \newline (\code{fraction}) & Assign fractional counts to features.
\hline
$--$fracOverlap $<float>$ \newline (\code{fracOverlap}) & Minimum fraction of overlapping bases in a read that is required for read assignment. Value should be a float number in the range [0,1]. 0 by default. If paired end, number of overlapping bases is counted from both reads. Soft-clipped bases are counted when calculating total read length (but ignored when counting overlapping bases). Both this option and `--minOverlap' option need to be satisfied for read assignment. \\
\hline
$--$fracOverlapFeature $<float>$ \newline (\code{fracOverlapFeature}) & Minimum fraction of bases included in a feature that is required for overlapping with a read or a read pair. Value should be within range [0,1]. 0 by default. \\
$--$fracOverlapFeature $<float>$ \newline (\code{fracOverlapFeature}) & Minimum fraction of bases included in a feature that is required to overlap with a read or a read pair. Value should be within range [0,1]. 0 by default. \\
\hline
$--$ignoreDup \newline (\code{ignoreDup}) & If specified, reads that were marked as duplicates will be ignored. Bit Ox400 in FLAG field of SAM/BAM file is used for identifying duplicate reads. In paired end data, the entire read pair will be ignored if at least one end is found to be a duplicate read.\\
\hline
......@@ -986,6 +987,10 @@ $--$maxMOp $<int>$ \newline (\code{maxMOp}) & Specify the maximum number of `M'
\hline
$--$minOverlap $<int>$ \newline (\code{minOverlap}) & Minimum number of overlapping bases in a read that is required for read assignment. 1 by default. If a negative value is provided, then a gap of up to specified size will be allowed between read and the feature that the read is assigned to. For assignment of read pairs (fragments), number of overlapping bases from each read from the same pair will be summed. \\
\hline
$--$nonOverlap $<int>$ \newline (\code{nonOverlap}) & Maximum number of non-overlapping bases in a read (or a read pair) that is allowed when being assigned to a feature. No limit is set by default. \\
\hline
$--$nonOverlapFeature $<int>$ \newline (\code{nonOverlapFeature}) & Maximum number of non-overlapping bases in a feature that is allowed in read assignment. No limit is set by default. \\
\hline
$--$primary \newline (\code{primaryOnly}) & If specified, only primary alignments will be counted. Primary and secondary alignments are identified using bit 0x100 in the Flag field of SAM/BAM files. All primary alignments in a dataset will be counted no matter they are from multi-mapping reads or not (ie. `-M' is ignored).\\
\hline
$--$read2pos $<int>$ \newline (\code{read2pos}) & The read is reduced to its 5' most base or 3' most base. Read summarization is then performed based on the single base position to which the read is reduced. By default, no read reduction will be performed.\\
......@@ -1145,7 +1150,7 @@ Arguments included in parenthesis are the equivalent parameters used by \code{ex
\hline
Arguments & Description \\
\hline
-a $<file>$ \newline (SNPAnnotationFile) & Specify name of a VCF-format file that includes annotated SNPs. Such annotation files can be downloaded from public databases such as the dbSNP database. Incorporating known SNPs into SNP calling has been found to be helpful. However note that the annotated SNPs may or may not be called for the sample being analyzed. \\
-a $<file>$ \newline (SNPAnnotationFile) & Specify name of a VCF-format file that includes annotated SNPs. Such annotation files can be downloaded from public databases such as the dbSNP database. Gzipped file is accepted. Incorporating known SNPs into SNP calling has been found to be helpful. However note that the annotated SNPs may or may not be called for the sample being analyzed. \\
\hline
-b \newline (isBAM) & Indicate the input file provided via $-i$ is in BAM format. \\
\hline
......@@ -1171,7 +1176,7 @@ Arguments & Description \\
\hline
-v & Output version of the program. \\
\hline
-x $<int>$ \newline (maxReads) & Specify the maximum number of mapped reads a SNP-containing location could have. 3000 by default. Any location having more than the threshold number of reads will not be considered for SNP calling. This option is useful for removing PCR artefacts. \\
-x $<int>$ \newline (maxReads) & Specify the maximum depth a SNP location is allowed to have. 1,000,000 reads by default. Any location having more reads than the maximum allowed depth will not be considered for SNP calling. This option is useful for removing PCR artefacts. \\
\hline
\end{longtable}
......
......@@ -47,6 +47,7 @@
#include "subread.h"
#include "input-files.h"
#include "seek-zlib.h"
#include "gene-algorithms.h"
#include "HelperFunctions.h"
......@@ -351,7 +352,7 @@ int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char
int cigar_cursor=0;
unsigned short current_section_chro_len=0, current_section_start_read_pos = 0, read_cursor = 0;
unsigned int chromosome_cursor=first_pos;
int ret=0;
int ret=0, is_first_S = 1;
for(cigar_cursor=0; ; cigar_cursor++)
{
......@@ -363,8 +364,10 @@ int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char
}
else
{
if(ch == 'S')
if(ch == 'S'){
if(is_first_S) current_section_start_read_pos = tmp_int;
read_cursor += tmp_int;
}
else if(ch == 'M' || ch == 'X' || ch == '=') {
read_cursor += tmp_int;
current_section_chro_len += tmp_int;
......@@ -391,6 +394,7 @@ int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char
}
//printf("C=%c, TV=%d, CC=%d, RC=%d\n", ch, tmp_int, chromosome_cursor, current_section_chro_len);
tmp_int = 0;
is_first_S = 0;
}
if(cigar_cursor>100) return -1;
}
......@@ -880,10 +884,10 @@ int rand_str(char * str_buff){
}
int mathrand_str(char * str_buff){
srand((int)(miltime()*100));
myrand_srand((int)(miltime()*100));
int x1;
for(x1 = 0; x1 < 6; x1++){
sprintf(str_buff+2*x1, "%02X", rand() & 0xff );
sprintf(str_buff+2*x1, "%02X", myrand_rand() & 0xff );
}
return 0;
}
......@@ -986,9 +990,10 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
void * context, int do_add_feature(char * gene_name, char * transcript_name, char * chro_name, unsigned int start, unsigned int end, int is_negative_strand, void * context) ){
char * file_line = malloc(MAX_LINE_LENGTH+1);
int lineno = 0, is_GFF_txid_warned = 0, is_GFF_geneid_warned = 0, loaded_features = 0;
FILE * fp = fopen(file_name, "r");
autozip_fp afp;
int aret = autozip_open(file_name, &afp);
if(NULL == fp){
if(aret < 0){
SUBREADprintf("Error: unable to open the annotation file : %s\n", file_name);
return -1;
}
......@@ -1000,8 +1005,8 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
feature_name = feature_name_tmp;
unsigned int start = 0, end = 0;
char * getres = fgets(file_line, MAX_LINE_LENGTH, fp);
if(getres == NULL) break;
aret = autozip_gets(&afp, file_line, MAX_LINE_LENGTH);
if(aret < 1) break;
lineno++;
if(is_comment_line(file_line, file_type, lineno-1))continue;
......@@ -1121,7 +1126,7 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
}
}
fclose(fp);
autozip_close(&afp);
free(file_line);
return loaded_features;
}
......
......@@ -16,7 +16,8 @@ ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped coverageCount
mkdir -p ../bin/utilities
mv sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount subtools qualityScores propmapped subread-fullscan removeDup ../bin/utilities
@echo
@echo "###########################################################"
......@@ -28,10 +29,10 @@ all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel
@echo "###########################################################"
@echo
sublong: longread-mapping/longread-mapping.c ${ALL_OBJECTS}
echo "MACOS= -D FREEBSD " > longread-mapping/make.version
rm -f longread-mapping/*.o
cd longread-mapping && $(MAKE)
sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
echo "MACOS= -D FREEBSD " > longread-one/make.version
rm -f longread-one/*.o
cd longread-one && $(MAKE)
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -14,10 +14,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # samMappedBases mergeVCF testZlib
all: detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # samMappedBases mergeVCF testZlib
mkdir -p ../bin/utilities
mv sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount propmapped qualityScores removeDup subread-fullscan txUnique ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv detectionCall repair coverageCount propmapped qualityScores removeDup subread-fullscan txUnique ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -28,10 +29,13 @@ all: sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex
@echo "###########################################################"
@echo
sublong: longread-mapping/longread-mapping.c ${ALL_OBJECTS}
echo " " > longread-mapping/make.version
rm -f longread-mapping/*.o
cd longread-mapping && $(MAKE)
sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
echo " " > longread-one/make.version
rm -f longread-one/*.o
cd longread-one && $(MAKE)
detectionCall: detection-calls.c ${ALL_OBJECTS}
${CC} -o detectionCall detection-calls.c ${ALL_OBJECTS} ${LDFLAGS}
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -13,7 +13,8 @@ ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # globalReassembly testZlib
mkdir -p ../bin/utilities
mv sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount subread-fullscan qualityScores removeDup propmapped ../bin/utilities
@echo
@echo "###########################################################"
......@@ -25,10 +26,10 @@ all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel
@echo "###########################################################"
@echo
sublong: longread-mapping/longread-mapping.c ${ALL_OBJECTS}
echo "MACOS= -D MACOS " > longread-mapping/make.version
rm -f longread-mapping/*.o
cd longread-mapping && $(MAKE)
sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
echo "MACOS= -D MACOS " > longread-one/make.version
rm -f longread-one/*.o
cd longread-one && $(MAKE)
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -96,6 +96,7 @@ struct SNP_Calling_Parameters{
unsigned int real_read_count;
unsigned int reported_SNPs;
unsigned int reported_indels;
int supporting_read_excessed_reported;
};
#define PRECALCULATE_FACTORIAL 2000000
......@@ -212,17 +213,16 @@ int is_snp_bitmap(char * bm, unsigned int pos)
return bm [bytes] & (1 << bits);
}
void put_hash_to_pile(HashTable * merge_table, unsigned int* snp_voting_piles, struct SNP_Calling_Parameters * parameters)
void put_hash_to_pile(HashTable * merge_table, unsigned int* snp_voting_piles, struct SNP_Calling_Parameters * parameters, char * chro, int block_start )
{
int bucket;
KeyValuePair * cursor;
for(bucket=0; bucket< merge_table -> numOfBuckets; bucket++)
{
for(bucket=0; bucket< merge_table -> numOfBuckets; bucket++) {
cursor = merge_table -> bucketArray[bucket];
while (1)
{
while (1) {
if (!cursor) break;
int base_N_qual = cursor->value - NULL;
int POI_pos = cursor->key - NULL - 100;
......@@ -232,20 +232,20 @@ void put_hash_to_pile(HashTable * merge_table, unsigned int* snp_voting_piles, s
int j;
unsigned int supporting_bases = 0;
for(j=0; j<4; j++)
{
for(j=0; j<4; j++) {
supporting_bases += snp_voting_piles[POI_pos*4+j];
}
if(supporting_bases < parameters->max_supporting_read_number)
{
if(qual1 < (parameters -> is_phred_64?'@':'!')+parameters->min_phred_score) // low quality bases
{
if(supporting_bases < parameters->max_supporting_read_number) {
if(qual1 < (parameters -> is_phred_64?'@':'!')+parameters->min_phred_score){ // low quality bases
// the low-seq-quality bases are not inclued in voting.
}
else
{
snp_voting_piles[POI_pos*4+base1]+=1;
} else snp_voting_piles[POI_pos*4+base1]+=1;
}else{
if(parameters -> supporting_read_excessed_reported < 100){
parameters -> supporting_read_excessed_reported++;
SUBREADprintf("Warning: the depth exceeded the limit of %d at %s : %d\n", parameters->max_supporting_read_number, chro , block_start + POI_pos);
if(parameters -> supporting_read_excessed_reported == 100)
SUBREADprintf("Too many warnings.\nNo more warning messages are reported.\n");
}
}
......@@ -254,7 +254,7 @@ void put_hash_to_pile(HashTable * merge_table, unsigned int* snp_voting_piles, s
}
}
int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, char ** SNP_bitmap_recorder, unsigned int * snp_voting_piles, int block_no, unsigned int reference_len, char * referenced_genome)
int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, char ** SNP_bitmap_recorder, unsigned int * snp_voting_piles, int block_no, unsigned int reference_len, char * referenced_genome, char * chro_name, unsigned int offset)
{
int last_read_id=-1,i;
HashTable * merge_table = HashTableCreate(1000);
......@@ -314,7 +314,7 @@ int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, ch
if(parameters->is_paired_end_data){
if( (last_read_id >>1) != (read_rec.read_number>>1) && last_read_id>=0 && merge_table -> numOfElements > 0)
{
put_hash_to_pile(merge_table, snp_voting_piles, parameters);
put_hash_to_pile(merge_table, snp_voting_piles, parameters,chro_name, offset);
HashTableDestroy(merge_table);
merge_table = HashTableCreate(1000);
}
......@@ -403,6 +403,13 @@ int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, ch
{
snp_voting_piles[(first_base_pos+i-1)*4+base_int]+=1;
}
}else{
if(parameters -> supporting_read_excessed_reported < 100){
parameters -> supporting_read_excessed_reported++;
SUBREADprintf("Warning: the depth exceeded the limit of %d at %s : %d\n", parameters->max_supporting_read_number, chro_name , offset + first_base_pos+i);
if(parameters -> supporting_read_excessed_reported == 100)
SUBREADprintf("Too many warnings.\nNo more warning messages are reported.\n");
}
}
}
}
......@@ -411,7 +418,7 @@ int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, ch
}
if(parameters->is_paired_end_data && merge_table -> numOfElements > 0)
put_hash_to_pile(merge_table, snp_voting_piles, parameters);
put_hash_to_pile(merge_table, snp_voting_piles, parameters, chro_name, offset);
HashTableDestroy(merge_table);
......@@ -637,7 +644,7 @@ int process_snp_votes(FILE *out_fp, unsigned int offset , unsigned int reference
pcutoff_list[i]=-1.;
}
int read_is_error = read_tmp_block(parameters, tmp_fp,&SNP_bitmap_recorder,snp_voting_piles,block_no, reference_len, referenced_genome);
int read_is_error = read_tmp_block(parameters, tmp_fp,&SNP_bitmap_recorder,snp_voting_piles,block_no, reference_len, referenced_genome, chro_name, offset);
fclose(tmp_fp);
if (parameters -> delete_piles)
......@@ -659,7 +666,7 @@ int process_snp_votes(FILE *out_fp, unsigned int offset , unsigned int reference
{
int min_phred_score_raw = parameters -> min_phred_score;
parameters -> min_phred_score -= 3;
read_tmp_block(parameters, tmp_fp,NULL , snp_BGC_piles,block_no, reference_len, referenced_genome);
read_tmp_block(parameters, tmp_fp,NULL , snp_BGC_piles,block_no, reference_len, referenced_genome, chro_name, offset);
parameters -> min_phred_score = min_phred_score_raw;
}
fclose(tmp_fp);
......@@ -1509,9 +1516,10 @@ void print_usage_snp(char * myname)
SUBREADputs("Optional arguments:");
SUBREADputs("");
SUBREADputs(" -a <file> Provide a set of annotated SNPs (e.g. SNPs included in the dbSNP");
SUBREADputs(" database). The supplied file should be in VCF format. Providing");
SUBREADputs(" known SNPs to the program should improve its SNP calling");
SUBREADputs(" performance. Note that the provided SNPs may or may not be called.");
SUBREADputs(" database). The supplied file should be in VCF format (gzipped file");
SUBREADputs(" is accepted). Providing known SNPs to the program should improve");
SUBREADputs(" its SNP calling performance. Note that the provided SNPs may or");
SUBREADputs(" may not be called.");
SUBREADputs("");
SUBREADputs(" -b Indicate the input file provided via -i is in BAM format.");
SUBREADputs("");
......@@ -1529,10 +1537,9 @@ void print_usage_snp(char * myname)
SUBREADputs(" -r <int> Specify the minimum number of mapped reads a SNP-containing");
SUBREADputs(" location must have (ie. the minimum coverage). 1 by default.");
SUBREADputs("");
SUBREADputs(" -x <int> Specify the maximum number of mapped reads a SNP-containing");
SUBREADputs(" location have have. 1000 by default. Any location having more than");
SUBREADputs(" the threshold number of reads will not be considered for SNP");
SUBREADputs(" calling. This option is useful for removing PCR artefacts.");
SUBREADputs(" -x <int> Specify the maximum depth a SNP location is allowed to have.");
SUBREADputs(" 1,000,000 reads by default. This option is useful for removing PCR");
SUBREADputs(" artefacts.");
SUBREADputs("");
SUBREADputs(" -s <int> Specify the minimum base quality scores (Phred scores) read bases");
SUBREADputs(" must satisfy to be used for SNP calling. 13 by default. Read bases");
......@@ -1604,7 +1611,7 @@ int main_snp_calling_test(int argc,char ** argv)
parameters.supporting_read_rate = 0.;
parameters.min_supporting_read_number = 1;
parameters.min_alternative_read_number = 1;
parameters.max_supporting_read_number = 1000;
parameters.max_supporting_read_number = 1000000;
parameters.neighbour_filter_testlen = -1;
parameters.neighbour_filter_rate = 0.000000001;
parameters.min_phred_score = 13;
......
......@@ -326,7 +326,7 @@ int main(int argc, char ** argv)
struct timeval xtime;
gettimeofday(&xtime,NULL);
srand(time(NULL)^xtime.tv_usec);
myrand_srand(time(NULL)^xtime.tv_usec);
......
......@@ -366,7 +366,7 @@ int transfer_SAM_to_position_table(char * sam_file)
int is_paired_end_reads = 0;
unsigned long long int file_offset = ftello(input_file.input_fp);
if(feof(input_file.input_fp))break;
if(feof((FILE *)input_file.input_fp))break;
read_text[0]=0;
qual_text[0]=0;
......
......@@ -406,7 +406,7 @@ void remove_sorted_neighbours(global_context_t * global_context)
merge_sort(sort_data, indel_context->total_events, event_neighbour_sort_compare, event_neighbour_sort_exchange, event_neighbour_sort_merge);
int xk2, position_delta, maxinum_removed_events = global_context-> config.do_fusion_detection? 9999999:1999999;
int xk2, position_delta, maxinum_removed_events =(global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection)? 9999999:1999999;
position_delta = 10;
int * to_be_removed_ids = malloc(sizeof(int) * (1+maxinum_removed_events)), to_be_removed_number=0;
......@@ -477,7 +477,7 @@ void remove_neighbour(global_context_t * global_context)
int xk1;
int * to_be_removed_ids;
int to_be_removed_number = 0, all_junctions = 0;
int maxinum_removed_events = global_context-> config.do_fusion_detection? 9999999:1999999;
int maxinum_removed_events =(global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection)? 9999999:1999999;
to_be_removed_ids = malloc(sizeof(int) * (1+maxinum_removed_events));
......@@ -564,7 +564,7 @@ void remove_neighbour(global_context_t * global_context)
}
}
if(1 && global_context->config.do_fusion_detection && event_body -> event_type == CHRO_EVENT_TYPE_FUSION)
if((1 && global_context-> config.do_fusion_detection || 1 && global_context-> config.do_long_del_detection) && event_body -> event_type == CHRO_EVENT_TYPE_FUSION)
{
for(xk2=-10 ; xk2 < 10 ; xk2++)
{
......@@ -1913,9 +1913,11 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
dyna_steps = core_dynamic_align(global_context, thread_context, read_text + last_correct_base, first_correct_base - last_correct_base, voting_position + last_correct_base + last_indel, movement_buffer, indels, read_name);
movement_buffer[dyna_steps]=0;
if(0 && FIXLENstrcmp("simulated.2467286", read_name) == 0){
SUBREADprintf("DYNAMIC: indels=%d, %s\n", indels, movement_buffer);
}
//#warning ">>>>>>> COMMENT NEXT BLOCK IN RELEASE <<<<<<<<"
if(0) {
......@@ -1943,7 +1945,6 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
int mv=movement_buffer[x1];
if(mv==3) total_mismatch++;
}
// if(pair_number==4)printf("XK3==%d\n", total_mismatch);
if(total_mismatch<=2 || (global_context->config.maximise_sensitivity_indel && total_mismatch <= 2 ))
for(x1=0; x1<dyna_steps;x1++)
......@@ -1960,24 +1961,9 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
}
else if ( is_in_indel && (mv == 0 || mv == 3) )
{
//if(pair_number == 17296)
//printf("R%d : NEW INDEL: %u; len = %d\n", is_second_read, indel_left_boundary - 1 , current_indel_len);
//#ifdef indel_debug
//#warning "=========== COMMENT THIS LINE ==============="
//printf("NEW INDEL: %u; len = %d\n", indel_left_boundary - 1 , current_indel_len);
//#endif
// let's test if it is ambiguous:
gene_value_index_t * current_value_index = thread_context?thread_context->current_value_index:global_context->current_value_index;
//#warning ">>>>>>> COMMENT NEXT BLOCK IN RELEASE <<<<<<<<"
if(0 && FIXLENstrcmp("D00491:277:C89FUANXX:7:1110:20418:31541",read_name ) == 0){
SUBREADprintf("INDEL_DDADD: abs(I=%d); INDELS=%d; PN=%d; LOC=%ul READ_CUR=%d\n",i, current_indel_len, pair_number, indel_left_boundary-1, cursor_on_read);
}
int ambiguous_count=0;
//#warning " >>>>>>>> MAKE SURE DISABLING THE NEXT BLOCK IS HARMLESS <<<<<<<<< "
if(0){
int ambiguous_i, best_matched_bases = match_chro(read_text + cursor_on_read - 6, current_value_index, indel_left_boundary - 6, 6, 0, global_context->config.space_type) +
match_chro(read_text + cursor_on_read - min(current_indel_len,0), current_value_index, indel_left_boundary + max(0, current_indel_len), 6, 0, global_context->config.space_type);
......@@ -1993,6 +1979,9 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
{
int old_event_id = -1;
if(0)if(indel_left_boundary >= 44438630 + 1210 - 200 && indel_left_boundary <= 44438630 + 1210 + 200 && current_indel_len == 6){
SUBREADprintf("ADD AN INDEL: %s : %u ; len = %u\n", read_name, indel_left_boundary, current_indel_len);
}
chromosome_event_t * new_event = local_add_indel_event(global_context, thread_context, event_table, read_text + cursor_on_read + min(0,current_indel_len), indel_left_boundary - 1, current_indel_len, 1, ambiguous_count, 0, &old_event_id);
mark_gapped_read(current_result);
......@@ -2285,7 +2274,7 @@ int write_indel_final_results(global_context_t * global_context)
FILE * ofp = NULL;
fn2 = malloc(MAX_FILE_NAME_LENGTH);
snprintf(fn2, MAX_FILE_NAME_LENGTH, "%s.indel", global_context->config.output_prefix);
snprintf(fn2, MAX_FILE_NAME_LENGTH, "%s.indel.vcf", global_context->config.output_prefix);
ofp = f_subr_open(fn2, "wb");
......@@ -4418,8 +4407,6 @@ int finalise_long_insertions(global_context_t * global_context)
void init_global_context(global_context_t * context)
{
srand(time(NULL));
memset(context->module_contexts, 0, 5*sizeof(void *));
memset(&context->config, 0, sizeof(configuration_t));
......@@ -4428,6 +4415,7 @@ void init_global_context(global_context_t * context)
context->config.report_sam_file = 1;
context->config.do_breakpoint_detection = 0;
context->config.do_fusion_detection = 0;
context->config.do_long_del_detection = 0;
context->config.do_structural_variance_detection = 0;
context->config.more_accurate_fusions = 1;
context->config.report_multi_mapping_reads = 0;
......@@ -4542,7 +4530,7 @@ void init_global_context(global_context_t * context)
int seed_rand[2];
double double_time = miltime();
memcpy(seed_rand, &double_time, 2*sizeof(int));
srand(seed_rand[0]^seed_rand[1]);
myrand_srand(seed_rand[0]^seed_rand[1]);
context->config.max_indel_length = 5;
context->config.phred_score_format = FASTQ_PHRED33;
......
......@@ -48,6 +48,7 @@ static struct option long_options[] =
{"SAMinput", no_argument, 0, 0},
{"reportPairedMultiBest", no_argument, 0, 0},
{"sv", no_argument, 0, 0},
{"longDel", no_argument, 0, 0},
{"extraColumns", no_argument, 0, 0},
{"forcedPE", no_argument, 0, 0},
{"ignoreUnmapped", no_argument, 0, 0},
......@@ -196,8 +197,9 @@ void print_usage_core_aligner()
SUBREADputs("");
SUBREADputs("# gene annotation");
SUBREADputs("");
SUBREADputs(" -a Name of an annotation file. GTF/GFF format by default. See");
SUBREADputs(" -F option for more format information.");
SUBREADputs(" -a Name of an annotation file (gzipped file is accepted).");
SUBREADputs(" GTF/GFF format by default. See -F option for more format");
SUBREADputs(" information.");
SUBREADputs("");
SUBREADputs(" -F Specify format of the provided annotation file. Acceptable");
SUBREADputs(" formats include 'GTF' (or compatible GFF format) and");
......@@ -484,6 +486,11 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
}
else if(strcmp("BAMinput", long_options[option_index].name)==0)
{
if(global_context->config.is_SAM_file_input && global_context->config.is_BAM_input == 0){
SUBREADprintf("ERROR: you cannot specify both SAMinput and BAMinput\n");
return -1;
}
global_context->config.is_BAM_input = 1;
global_context->config.is_SAM_file_input = 1;
}
......@@ -493,6 +500,11 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
}
else if(strcmp("SAMinput", long_options[option_index].name)==0)
{
if(global_context->config.is_SAM_file_input && global_context->config.is_BAM_input == 1){
SUBREADprintf("ERROR: you cannot specify both SAMinput and BAMinput\n");
return -1;
}
global_context->config.is_BAM_input = 0;
global_context->config.is_SAM_file_input = 1;
}
......@@ -515,6 +527,12 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
global_context->config.prefer_donor_receptor_junctions = 0;
//global_context->config.do_big_margin_filtering_for_reads = 1;
}
else if(strcmp("longDel", long_options[option_index].name)==0)
{
global_context->config.do_breakpoint_detection = 1;
global_context->config.do_long_del_detection = 1;
global_context->config.prefer_donor_receptor_junctions = 0;
}
else if(strcmp("minMappedLength", long_options[option_index].name)==0)
{
if(!is_valid_digit(optarg, "minMappedLength"))
......@@ -553,7 +571,7 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
{
global_context->config.maximise_sensitivity_indel = 1;
global_context->config.realignment_minimum_variant_distance = 1;
global_context->config.max_indel_length = 16;
// global_context->config.max_indel_length = max(16, global_context->config.max_indel_length);
}
else if(strcmp("minVoteCutoff", long_options[option_index].name)==0)
{
......@@ -577,7 +595,7 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
if(global_context->config.reported_multi_best_reads > 1 && ! global_context->config.report_multi_mapping_reads)
SUBREADprintf("WARNING: You required multi best alignments, but disallowed multi-mapping reads. You need to turn on the multi-mapping option.\n");
global_context->config.more_accurate_fusions = global_context->config.more_accurate_fusions && global_context->config.do_fusion_detection;
global_context->config.more_accurate_fusions = global_context->config.more_accurate_fusions && (global_context->config.do_fusion_detection || global_context->config.do_long_del_detection);
if(global_context->config.more_accurate_fusions)
{
global_context->config.high_quality_base_threshold = 999999;
......
......@@ -199,8 +199,9 @@ void print_usage_core_subjunc()
SUBREADputs("");
SUBREADputs("# gene annotation");
SUBREADputs("");
SUBREADputs(" -a Name of an annotation file. GTF/GFF format by default. See");
SUBREADputs(" -F option for more format information.");
SUBREADputs(" -a Name of an annotation file (gzipped file is accepted).");
SUBREADputs(" GTF/GFF format by default. See -F option for more format");
SUBREADputs(" information.");
SUBREADputs("");
SUBREADputs(" -F Specify format of the provided annotation file. Acceptable");
SUBREADputs(" formats include 'GTF' (or compatible GFF format) and");
......@@ -498,11 +499,19 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
}
else if(strcmp("BAMinput", long_options[option_index].name)==0)
{
if(global_context->config.is_SAM_file_input && global_context->config.is_BAM_input == 0){
SUBREADprintf("Error: you cannot specify both SAMinput and BAMinput\n");
return -1;
}
global_context->config.is_BAM_input = 1;
global_context->config.is_SAM_file_input = 1;
}
else if(strcmp("SAMinput", long_options[option_index].name)==0)
{
if(global_context->config.is_SAM_file_input && global_context->config.is_BAM_input == 1){
SUBREADprintf("Error: you cannot specify both SAMinput and BAMinput\n");
return -1;
}
global_context->config.is_BAM_input = 0;
global_context->config.is_SAM_file_input = 1;
}
......@@ -583,7 +592,7 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
{
global_context->config.maximise_sensitivity_indel = 1;
global_context->config.realignment_minimum_variant_distance = 1;
global_context->config.max_indel_length = 16;
// global_context->config.max_indel_length = 16;
}
else if(strcmp("disableBigMargin", long_options[option_index].name)==0)
{
......
This diff is collapsed.
......@@ -159,4 +159,5 @@ void finalise_structural_variances(global_context_t * global_context);
void debug_show_event(global_context_t* global_context, chromosome_event_t * event);
void get_event_two_coordinates(global_context_t * global_context, unsigned int event_no, char ** small_chro, int * small_pos, unsigned int * small_abs, char ** large_chro, int * large_pos, unsigned int * large_abs);
int get_offset_maximum_chro_pos(global_context_t * global_context, thread_context_t * thread_context, unsigned int linear);
#endif
......@@ -381,9 +381,9 @@ int show_summary(global_context_t * global_context)
if(global_context->config.output_prefix[0])
{
if(global_context->config.entry_program_name == CORE_PROGRAM_SUBJUNC && ( global_context -> config.prefer_donor_receptor_junctions || !global_context -> config.do_fusion_detection))
if(global_context->config.entry_program_name == CORE_PROGRAM_SUBJUNC && ( global_context -> config.prefer_donor_receptor_junctions || !(global_context -> config.do_fusion_detection || global_context -> config.do_long_del_detection)))
print_in_box(80, 0,0," Junctions : %'u", global_context -> all_junctions);
if(global_context->config.do_fusion_detection)
if((global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection))
print_in_box(80, 0,0," Fusions : %'u", global_context -> all_fusions);
print_in_box(80, 0,0," Indels : %'u", global_context -> all_indels);
}
......@@ -648,7 +648,8 @@ int parse_opts_core(int argc , char ** argv, global_context_t * global_context)
global_context->config.minimum_subread_for_second_read = atoi(optarg);
break;
case 't':
sprintf(global_context->config.temp_file_prefix, "%s/core-temp-sum-%06u-%05u", optarg, getpid(), rand());
sprintf(global_context->config.temp_file_prefix, "%s/core-temp-sum-%06u-%05u", optarg, getpid(), myrand_rand()
);
break;
case 'F':
global_context->config.is_second_iteration_running = 0;
......@@ -719,11 +720,36 @@ int check_configuration(global_context_t * global_context)
}
unsigned long long myrand_seed = 0;
void myrand_srand(unsigned long long seed){
struct timeval xtime;
gettimeofday(&xtime,NULL);
unsigned long long tv = xtime.tv_usec ^ (xtime.tv_sec * 81811);
myrand_seed ^= tv;
tv = xtime.tv_sec ^ (xtime.tv_usec << 23);
myrand_seed ^= tv;
myrand_rand();
myrand_seed ^= seed;
myrand_rand();
}
int myrand_rand(){
if(myrand_seed % 3133LLU == 0) myrand_srand(0);
myrand_seed ^= (myrand_seed % 131317);
myrand_seed ^= myrand_seed << 13;
return (int)(myrand_seed % (1LLU+RAND_MAX));
}
int core_main(int argc , char ** argv, int (parse_opts (int , char **, global_context_t * )))
{
struct timeval xtime;
gettimeofday(&xtime,NULL);
srand(time(NULL)^xtime.tv_usec);
myrand_srand(time(NULL)^xtime.tv_usec);
global_context_t * global_context;
global_context = (global_context_t*)malloc(sizeof(global_context_t));
......@@ -895,7 +921,7 @@ int convert_GZ_to_FQ(global_context_t * global_context, char * fname, int half_n
int is_OK = 0;
char temp_file_name[200];
char * linebuff=malloc(3001);
gzFile * rawfp = gzopen(fname, "r");
gzFile rawfp = gzopen(fname, "r");
if(rawfp)
{
......@@ -1039,17 +1065,17 @@ int fetch_next_read_pair(global_context_t * global_context, thread_context_t * t
int write_final_results(global_context_t * context)
{
if(context -> config.do_fusion_detection && context -> config.do_structural_variance_detection)
if((context -> config.do_fusion_detection || context -> config.do_long_del_detection) && context -> config.do_structural_variance_detection)
finalise_structural_variances(context);
if(context -> config.output_prefix[0])
{
write_indel_final_results(context);
if(context -> config.entry_program_name == CORE_PROGRAM_SUBJUNC && (context -> config.prefer_donor_receptor_junctions||!context -> config.do_fusion_detection))
if(context -> config.entry_program_name == CORE_PROGRAM_SUBJUNC && (context -> config.prefer_donor_receptor_junctions||!(context -> config.do_fusion_detection || context -> config.do_long_del_detection)))
write_junction_final_results(context);
if(context -> config.do_fusion_detection)
if((context -> config.do_fusion_detection || context -> config.do_long_del_detection))
write_fusion_final_results(context);
}
......@@ -1213,8 +1239,8 @@ int add_head_tail_cut_softclipping(global_context_t * global_context, unsigned i
if('M' == nch || 'D' == nch || 'S' == nch || 'N' == nch){
int is_start_in_chro, is_end_in_chro;
is_start_in_chro = get_offset_maximum_chro_pos(global_context,linear_cursor);
is_end_in_chro = get_offset_maximum_chro_pos(global_context,linear_cursor + tmpi);
is_start_in_chro = get_offset_maximum_chro_pos(global_context,NULL,linear_cursor);
is_end_in_chro = get_offset_maximum_chro_pos(global_context,NULL,linear_cursor + tmpi);
linear_cursor += tmpi;
}
......@@ -1267,7 +1293,7 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
int current_strand = (current_result -> mapping_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
int is_first_section_jumped = current_result -> first_base_is_jumpped;
if(is_first_section_jumped) assert(global_context -> config.do_fusion_detection);
if(is_first_section_jumped) assert((global_context -> config.do_fusion_detection || global_context -> config.do_long_del_detection));
strcpy(r-> current_cigar_decompress, current_result -> cigar_string);
int chimeric_sections = 0;
......@@ -1290,7 +1316,7 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
//printf("SM='%s'\n", r->additional_information);
if(global_context -> config.do_fusion_detection)
if((global_context -> config.do_fusion_detection || global_context -> config.do_long_del_detection))
{
chimeric_sections = chimeric_cigar_parts(global_context, r->linear_position, is_first_section_jumped ^ current_strand, is_first_section_jumped, r->current_cigar_decompress, r->out_poses, output_context->out_cigar_buffer, r->out_strands, read_len, r->out_lens, read_name);
......@@ -1331,7 +1357,7 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
{
int head_cut = 0 , tail_cut = 0;
if(locate_gene_position_max(r->linear_position + r->soft_clipping_movements,& global_context -> chromosome_table, &r-> chro , &r -> offset, &head_cut, &tail_cut, global_context->config.do_fusion_detection?read_len:(current_result->chromosomal_length - r->soft_clipping_movements))) {
if(locate_gene_position_max(r->linear_position + r->soft_clipping_movements,& global_context -> chromosome_table, &r-> chro , &r -> offset, &head_cut, &tail_cut,(global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection)?read_len:(current_result->chromosomal_length - r->soft_clipping_movements))){
is_r_OK = 0;
} else {
int is_added_OK = 1;
......@@ -1906,7 +1932,7 @@ void write_single_fragment(global_context_t * global_context, thread_context_t *
//#warning "SUBREAD_151 ============= REMOVE '+1' when not doing structure variance detection for the proposal =========="
if(0)
if(global_context -> config.do_fusion_detection && rec1 && rec2 && current_location == 0 && rec1 -> chimeric_sections == 1 && rec2 -> chimeric_sections == 1){
if((global_context -> config.do_fusion_detection || global_context -> config.do_long_del_detection) && rec1 && rec2 && current_location == 0 && rec1 -> chimeric_sections == 1 && rec2 -> chimeric_sections == 1){
// when current_location == 0, both r1 and r2 were on the same strand.
int is_funky = is_funky_fragment(global_context, read_name_1, rec1 -> chro, rec1 -> offset, read_len_1, rec1 -> strand, rec1 -> out_cigars[0], read_text_1, read_name_2, rec2 -> chro, rec2 -> offset, read_len_2, rec2 -> strand, rec2 -> out_cigars[0] , read_text_2, tlen);
if (0 &&is_funky)
......@@ -2278,13 +2304,13 @@ int do_iteration_one(global_context_t * global_context, thread_context_t * threa
char qual_text_1[MAX_READ_LENGTH+1], qual_text_2[MAX_READ_LENGTH+1];
char read_name_1[MAX_READ_NAME_LEN+1], read_name_2[MAX_READ_NAME_LEN+1];
int read_len_1, read_len_2=0;
int need_junction_step = global_context -> config.do_breakpoint_detection || global_context -> config.do_fusion_detection;
int need_junction_step = global_context -> config.do_breakpoint_detection || global_context -> config.do_fusion_detection || global_context -> config.do_long_del_detection;
int sqr_interval, sqr_read_number = 0;
init_chunk_scanning_parameters(global_context,thread_context, & ginp1, & ginp2);
sqr_interval = max(5000,global_context -> processed_reads_in_chunk/10/ global_context -> config.all_threads);
while(1)
if(0) while(1)
{
int is_second_read;
......@@ -2323,6 +2349,7 @@ int do_iteration_one(global_context_t * global_context, thread_context_t * threa
continue;
}
// DISABLED
find_new_indels(global_context, thread_context, current_read_number, current_read_name, current_read, current_qual, current_rlen, is_second_read, best_read_id);
if(need_junction_step)
find_new_junctions(global_context, thread_context, current_read_number, current_read_name, current_read, current_qual, current_rlen, is_second_read, best_read_id);
......@@ -3139,6 +3166,20 @@ int core_get_subread_quality(global_context_t * global_context, thread_context_t
return ret;
}
int has_better_mapping(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t current_read_number, int is_second_read, int this_aln_id){
int better_read_id;
mapping_result_t * this_r = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read, this_aln_id);
for(better_read_id = 0; better_read_id < this_aln_id; better_read_id++){
mapping_result_t * better_r = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read, better_read_id);
if( this_r -> selected_position >= better_r -> selected_position - global_context -> config.max_indel_length - 1
&& this_r -> selected_position <= better_r -> selected_position + global_context -> config.max_indel_length +1 )
if( this_r -> confident_coverage_start >= better_r -> confident_coverage_start
&& this_r -> confident_coverage_end <= better_r -> confident_coverage_end) return 1;
}
return 0;
}
int do_voting(global_context_t * global_context, thread_context_t * thread_context)
{
int ret, xk1;
......@@ -3174,13 +3215,12 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
unsigned int low_index_border = global_context -> current_value_index -> start_base_offset;
unsigned int high_index_border = global_context -> current_value_index -> start_base_offset + global_context -> current_value_index -> length;
int has_second_read = 1 + global_context -> input_reads.is_paired_end_reads;
//int need_junction_step = global_context -> config.do_breakpoint_detection || global_context -> config.do_fusion_detection;
if(thread_context)
thread_context -> current_value_index = global_context -> current_value_index;
int GENE_SLIDING_STEP = global_context -> current_index -> index_gap;
int need_junction_step = global_context -> config.do_breakpoint_detection || global_context -> config.do_fusion_detection;
int need_junction_step = global_context -> config.do_breakpoint_detection || global_context -> config.do_fusion_detection ||global_context -> config.do_long_del_detection;
while(1)
{
......@@ -3206,7 +3246,7 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
for(is_reversed = 0; is_reversed<2; is_reversed++)
{
//printf("MAP_REA = %s / %s\n", read_text_1, read_text_2);
if(is_reversed==0 || !global_context->config.do_fusion_detection)
if(is_reversed==0 || !(global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection))
{
init_gene_vote(vote_1);
if(global_context -> input_reads.is_paired_end_reads) init_gene_vote(vote_2);
......@@ -3271,17 +3311,7 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
char * subread_string = current_read + subread_offset;
gehash_key_t subread_integer = genekey2int(subread_string, global_context->config.space_type);
//SUBREADprintf("The %d-th subread=%s\n", subread_no, subread_string);
if(global_context -> config.use_quality_score_break_ties)
{
char * quality_string_subr = current_qual + subread_offset;
subread_quality = core_get_subread_quality(global_context, thread_context, quality_string_subr, 16);
}
//SUBREADprintf("%d=%u %s\n", subread_offset, subread_integer, subread_string);
if(global_context->config.is_methylation_reads)
gehash_go_q_CtoT(global_context->current_index, subread_integer , subread_offset, current_rlen, is_reversed, current_vote, 1, subread_quality, 0xffffff, voting_max_indel_length, subread_no, 1, low_index_border, high_index_border - current_rlen);
else
......@@ -3321,34 +3351,14 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
if(is_reversed==1 || !global_context->config.do_fusion_detection)
if(is_reversed==1 || !(global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection))
{
//SUBREADprintf("P%d %llu %s\n", is_reversed, current_read_number, read_name_1);
//#warning ">>>>>>> DISABLE THE FOLLOING BLOCK <<<<<<"
if(0) { int dx1, vtab_items1=0, vtab_items2=0;
for(dx1 = 0; dx1 < GENE_VOTE_TABLE_SIZE; dx1++) vtab_items1+=vote_1->items[dx1];
for(dx1 = 0; dx1 < GENE_VOTE_TABLE_SIZE; dx1++) vtab_items2+=vote_2->items[dx1];
printf("OCT27-STEPA-%s-P%d, V1 %d, V2 %d\n", read_name_1, is_reversed, vtab_items1, vtab_items2);
}
//#warning ">>>>>>> DISABLE THE FOLLOING BLOCK <<<<<<"
if(0 && FIXLENstrcmp("R010442852", read_name_1) ==0 ) {
if(0)if(1||FIXLENstrcmp("simulated.603514/", read_name_1) ==0 || FIXLENstrcmp("simulated.1613132", read_name_1) ==0 ) {
SUBREADprintf(">>>%llu<<<\n%s [%d] %s\n%s [%d] %s\n", current_read_number, read_name_1, read_len_1, read_text_1, read_name_2, read_len_2, read_text_2);
SUBREADprintf(" ======= PAIR %s = %llu ; NON_INFORMATIVE = %d, %d =======\n", read_name_1, current_read_number, vote_1 -> noninformative_subreads, vote_2 -> noninformative_subreads);
print_votes(vote_1, global_context -> config.index_prefix);
print_votes(vote_2, global_context -> config.index_prefix);
}
//finalise_vote(vote_1);
/*
if(407229 == current_read_number){
fprintf(stderr, "TABLE_ITEMS=%llu\n", global_context->current_index->current_items);
//print_votes(vote_1, global_context -> config.index_prefix);
//print_votes(vote_2, global_context -> config.index_prefix);
}*/
//if(global_context -> input_reads.is_paired_end_reads) finalise_vote(vote_2);
//SUBREADprintf("NON-INFORMARTIVE=%d, %d\n", vote_1 -> noninformative_subreads, vote_2 -> noninformative_subreads);
if(global_context -> input_reads.is_paired_end_reads)
process_voting_junction(global_context, thread_context, current_read_number, vote_1, vote_2, read_name_1, read_name_2, read_text_1, read_text_2, read_len_1, read_len_2, is_reversed, v1_all_subreads, v2_all_subreads);
......@@ -3365,10 +3375,6 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
_global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->noninformative_subreads_in_vote = max(_global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->noninformative_subreads_in_vote, vote_1 -> noninformative_subreads);
}
}
//if(current_read_number == 0) print_votes(vote_1, global_context -> config.index_prefix);
//if(current_read_number == 0) printf("V0=%d; %d\n", _global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->selected_votes, _global_retrieve_alignment_ptr(global_context, current_read_number, 1,0)->selected_votes);
}
if(is_reversed == 0)
......@@ -3427,9 +3433,15 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
locate_current_value_index(global_context, thread_context, current_r, current_rlen);
//#warning "==== UNCOMMENT THE NEXT THREE LINES ===="
if(0 && FIXLENstrcmp("simulated.2467286/2", current_read_name)==0) SUBREADprintf("TEST_BETTER: better_id = %d, votes = %d, has=%d\n", best_read_id, current_r -> selected_votes, has_better_mapping(global_context, thread_context, current_read_number, is_second_read,best_read_id));
if(!has_better_mapping(global_context, thread_context, current_read_number, is_second_read,best_read_id))
find_new_indels(global_context, thread_context, current_read_number, current_read_name, current_read, current_qual, current_rlen, is_second_read, best_read_id);
if(need_junction_step)
find_new_junctions(global_context, thread_context, current_read_number, current_read_name, current_read, current_qual, current_rlen, is_second_read, best_read_id);
if(0 && FIXLENstrcmp("simulated.2467286/2", current_read_name)==0) SUBREADprintf("TEST_BETTER_END: better_id = %d, votes = %d, has=%d\n", best_read_id, current_r -> selected_votes, has_better_mapping(global_context, thread_context, current_read_number, is_second_read,best_read_id));
if(thread_context) thread_context -> current_value_index = curr_val_index;
else global_context -> current_value_index = curr_val_index;
}
......@@ -3537,7 +3549,7 @@ int run_maybe_threads(global_context_t *global_context, int task)
{
thread_contexts[current_thread_no].thread_id = current_thread_no;
init_indel_thread_contexts(global_context, thread_contexts+current_thread_no, task);
if(global_context->config.do_breakpoint_detection || global_context->config.do_fusion_detection)
if((global_context->config.do_breakpoint_detection || global_context-> config.do_fusion_detection || global_context->config.do_breakpoint_detection || global_context-> config.do_long_del_detection))
init_junction_thread_contexts(global_context, thread_contexts+current_thread_no, task);
subread_lock_occupy(&global_context -> thread_initial_lock);
......@@ -3816,9 +3828,9 @@ int print_configuration(global_context_t * context)
if(context->config.do_breakpoint_detection)
{
if(context-> config.do_fusion_detection)
{
print_in_box(80, 0, 0, "Function : Read alignment + Junction/Fusion detection%s", context->config.experiment_type == CORE_EXPERIMENT_DNASEQ?" (DNA-Seq)":" (RNA-Seq)");
}
else if(context-> config.do_long_del_detection)
print_in_box(80, 0, 0, "Function : Read alignment + Long Deletion detection%s", context->config.experiment_type == CORE_EXPERIMENT_DNASEQ?" (DNA-Seq)":" (RNA-Seq)");
else
print_in_box(80, 0, 0, "Function : Read alignment + Junction detection (%s)", context->config.experiment_type == CORE_EXPERIMENT_DNASEQ?"DNA-Seq":"RNA-Seq");
}
......@@ -4254,7 +4266,7 @@ int init_modules(global_context_t * context)
{
sublog_printf(SUBLOG_STAGE_DEV1, SUBLOG_LEVEL_DEBUG, "init_modules: begin");
int ret = init_indel_tables(context);
if(context->config.do_breakpoint_detection || context->config.do_fusion_detection)
if((context->config.do_breakpoint_detection || context-> config.do_fusion_detection || context->config.do_breakpoint_detection || context-> config.do_long_del_detection))
ret = ret || init_junction_tables(context);
sublog_printf(SUBLOG_STAGE_DEV1, SUBLOG_LEVEL_DEBUG, "init_modules: finished: %d",ret);
......@@ -4264,7 +4276,7 @@ int init_modules(global_context_t * context)
int destroy_modules(global_context_t * context)
{
destroy_indel_module(context);
if(context->config.do_breakpoint_detection || context->config.do_fusion_detection)
if((context->config.do_breakpoint_detection || context-> config.do_fusion_detection || context->config.do_breakpoint_detection || context-> config.do_long_del_detection))
destroy_junction_tables(context);
return 0;
}
......