Imported Upstream version 1.5.1+dfsg

parent 5c4e244d
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -35,9 +35,9 @@
\begin{center}
{\Huge\bf Subread/Rsubread Users Guide}\\
\vspace{1 cm}
{\centering\large Subread v1.5.0-p3/Rsubread v1.22.1\\}
{\centering\large Subread v1.5.1/Rsubread v1.23.4\\}
\vspace{1 cm}
\centering 27 May 2016\\
\centering 25 August 2016\\
\vspace{5 cm}
\Large Wei Shi and Yang Liao\\
\vspace{1 cm}
......@@ -306,19 +306,19 @@ An index must be built for the reference first and then the read mapping can be
{\noindent\bf Step 2: Align reads}\\
\noindent Map single-end reads from a gzipped file using 5 threads and save mapping results to a BAM file:\\
\code{subread-align -T 5 -i my\_index -r reads.txt.gz -o subread\_results.bam}\\
\code{subread-align -t 1 -T 5 -i my\_index -r reads.txt.gz -o subread\_results.bam}\\
\noindent Detect indels of up to 16bp:\\
\code{subread-align -I 16 -i my\_index -r reads.txt -o subread\_results.sam}\\
\code{subread-align -t 1 -I 16 -i my\_index -r reads.txt -o subread\_results.sam}\\
\noindent Report up to three best mapping locations:\\
\code{subread-align -B 3 -i my\_index -r reads.txt -o subread\_results.sam}\\
\code{subread-align -t 1 -B 3 -i my\_index -r reads.txt -o subread\_results.sam}\\
\noindent Report uniquely mapped reads only:\\
\code{subread-align -u -i my\_index -r reads.txt -o subread\_results.sam}\\
\code{subread-align -t 1 -u -i my\_index -r reads.txt -o subread\_results.sam}\\
\noindent Map paired-end reads:\\
\code{subread-align -d 50 -D 600 -i my\_index -r reads1.txt -R reads2.txt \newline -o subread\_results.sam}\\
\code{subread-align -t 1 -d 50 -D 600 -i my\_index -r reads1.txt -R reads2.txt \newline -o subread\_results.sam}\\
\section{A quick start for using Bioconductor {\Rsubread} package}
......@@ -338,22 +338,22 @@ buildindex(basename="my_index",reference="genome.fa")
\noindent Map single-end reads using 5 threads:
\begin{Rcode}
align(index="my_index",readfile1="reads.txt.gz",output_file="rsubread.bam",nthreads=5)
align(index="my_index",readfile1="reads.txt.gz",type="dna",output_file="rsubread.bam",nthreads=5)
\end{Rcode}
\noindent Detect indels of up to 16bp:
\begin{Rcode}
align(index="my_index",readfile1="reads.txt.gz",output_file="rsubread.bam",indels=16)
align(index="my_index",readfile1="reads.txt.gz",type="dna",output_file="rsubread.bam",indels=16)
\end{Rcode}
\noindent Report up to three best mapping locations:
\begin{Rcode}
align(index="my_index",readfile1="reads.txt.gz",output_file="rsubread.bam",nBestLocations=3)
align(index="my_index",readfile1="reads.txt.gz",type="dna",output_file="rsubread.bam",nBestLocations=3)
\end{Rcode}
\noindent Map paired-end reads:
\begin{Rcode}
align(index="my_index",readfile1="reads1.txt.gz",readfile2="reads2.txt.gz",
align(index="my_index",readfile1="reads1.txt.gz",readfile2="reads2.txt.gz",type="dna",
output_file="rsubread.bam",minFragLength=50,maxFragLength=600)
\end{Rcode}
......@@ -370,7 +370,7 @@ Table 1 describes the arguments used by the \code{subread-buildindex} program.
\newpage
\begin{table}[h]
\raggedright{Table 1: Arguments used by the \code{subread-buildindex} program (\code{buildindex} function in \Rsubread).
\raggedright{Table 1: Arguments used by the \code{subread-buildindex} program (\code{buildindex} function in \Rsubread) in alphabetical order.
Arguments in parenthesis in the first column are used by \code{buildindex}.\newline\\}
\begin{tabular}{|p{4cm}|p{12cm}|}
\hline
......@@ -407,7 +407,7 @@ Arguments used in Bioconductor \code{Rsubread} package are included in parenthes
\begin{longtable}{|p{4cm}|p{12cm}|}
\multicolumn{2}{p{16cm}}{Table 2: Arguments used by the \code{subread-align}/\code{subjunc} programs included in the SourceForge {\Subread} package.
\multicolumn{2}{p{16cm}}{Table 2: Arguments used by the \code{subread-align}/\code{subjunc} programs included in the SourceForge {\Subread} package in alphabetical order.
Arguments in parenthesis in the first column are the equivalent arguments used in Bioconductor {\Rsubread} package.
Arguments used by \code{subread-align} only are marked with $^*$.
Arguments used by \code{subjunc} only are marked with $^{**}$.
......@@ -447,7 +447,7 @@ Arguments & Description \\
\hline
-S $<ff:fr:rf>$ \newline (\code{PE\_orientation}) & Specify the orientation of the two reads from the same pair. It has three possible values including `fr', `ff' and `'rf. Letter `f' denotes the forward strand and letter `r' the reverse strand. `fr' by default (ie. the first read in the pair is on the forward strand and the second read on the reverse strand).\\
\hline
$^*$ -t $<int>$ \newline (\code{type}) & Specify the type of input sequencing data. Possible values include \code{0}, denoting RNA-seq data, or \code{1}, denoting genomic DNA-seq data. Character values including `rna' and `dna' can also be used in the {\R} function. For genomic DNA-seq data, the aligner takes into account both the number of matched bases and the number of mis-matched bases to determine the the best mapping location after applying the `seed-and-vote' approach for read mapping. For RNA-seq data, only the number of mis-matched bases is considered for determining the best mapping location. \\
$^*$ -t $<int>$ \newline (\code{type}) & Specify the type of input sequencing data. Possible values include \code{0}, denoting RNA-seq data, or \code{1}, denoting genomic DNA-seq data. User must specify the value. Character values including `rna' and `dna' can also be used in the {\R} function. For genomic DNA-seq data, the aligner takes into account both the number of matched bases and the number of mis-matched bases to determine the the best mapping location after applying the `seed-and-vote' approach for read mapping. For RNA-seq data, only the number of mis-matched bases is considered for determining the best mapping location. \\
\hline
-T $<int>$ \newline (\code{nthreads}) & Specify the number of threads/CPUs used for mapping. The value should be between 1 and 32. 1 by default.\\
\hline
......@@ -489,23 +489,36 @@ $--$trim3 $<int>$ \newline (\code{nTrim3}) & Trim off $<int>$ number of bases fr
\section{Mapping quality scores}
Both {\Subread} and {\Subjunc} aligners output a mapping quality score (MQS) for each mapped read,
computed as
{\Subread} and {\Subjunc} aligners assign a mapping quality score (MQS) to each mapped read to indicate the confidence of the mapping:\\
\[ MQS = \left\{
\begin{array}{l l}
(\sum_{i \in b_m} ( 1 - p_i) - \sum_{i \in b_{mm}} (1 - p_i)) \times 60 / L & \quad \text{if uniquely mapped}\\
& \quad \text{\scriptsize{[MQS is reset to 0 if less than 0]}}\\
\frac{40}{(N_c + N_{mm})} & \quad \text{if only one best location found}\\
& \\
0 & \quad \text{if mapped to $>1$ best location}\\
\end{array} \right.\]
& \\
0 & \quad \text{if $>1$ equally best locations were found}\\
\end{array} \right.\] \\
\noindent where $N_c$ is the number of candidate locations that are considered for final full alignment (re-alignment step).
Such locations must have a vote number within top three vote numbers counted from all locations considered in the subread-mapping step (first scan).
Up to three candiate locations are considered for each read in the realignment step.
$N_{mm}$ is the number of mismatches found in the alignment at each candidate location.
where $L$ is the read length, $p_i$ is the base-calling $p$-value for the $i$th base in the read, $b_m$ is the set of locations of matched bases, and $b_{mm}$ is the set of locations of mismatched bases.
% \[ MQS = \left\{
% \begin{array}{l l}
% (\sum_{i \in b_m} ( 1 - p_i) - \sum_{i \in b_{mm}} (1 - p_i)) \times 60 / L & \quad \text{if uniquely mapped}\\
% & \quad \text{\scriptsize{[MQS is reset to 0 if less than 0]}}\\
% & \\
% 0 & \quad \text{if $>1$ equally best locations found}\\
% \end{array} \right.\]
% where $L$ is the read length, $p_i$ is the base-calling $p$-value for the $i$th base in the read, $b_m$ is the set of locations of matched bases, and $b_{mm}$ is the set of locations of mismatched bases.
% Base-calling p values can be readily computed from the base quality scores.
% Read bases of high sequencing quality have low base-calling p values.
% Read bases that were found to be insertions are treated as matched bases in the MQS calculation.
% The MQS is a read-length normalized value and it is in the range [0, 60).
Base-calling p values can be readily computed from the base quality scores.
Read bases of high sequencing quality have low base-calling p values.
Read bases that were found to be insertions are treated as matched bases in the MQS calculation.
The MQS is a read-length normalized value and it is in the range [0, 60).
\section{Mapping output}
......@@ -542,7 +555,7 @@ You can provide a list of FASTA files or a single FASTA file including all the r
The commands used by {\Subread} to align RNA-seq reads are the same as those used to align gDNA-seq reads.
Below is an example of using {\Subread} to map single-end RNA-seq reads.\\
\code{subread-align -i my\_index -r rnaseq-reads.txt -o subread\_results.sam}\\
\code{subread-align -t 0 -i my\_index -r rnaseq-reads.txt -o subread\_results.sam}\\
\noindent Another RNA-seq aligner included in this package is the {\Subjunc} aligner.
{\Subjunc} not only performs read alignments but also detects exon-exon junctions.
......@@ -652,10 +665,11 @@ We found that there was a significant decrease in the number of mapped reads whe
We therefore used a threshold of 4 consensus subreads to map this dataset.
However, what we observed might not be the case for other datasets that were generated from different cell types and different species.
Below is an example of mapping 50bp long reads (adaptor sequences were included in the reads in addition to the miRNA sequences), with at least 4 consensus subreads required in the mapping:
Below is an example of mapping 50bp long reads (adaptor sequences were included in the reads in addition to the miRNA sequences), with at least 4 consensus subreads required in the mapping.
Note that `-t' option should have a value of 1 since miRNA-seq reads are more similar to gDNA-seq reads than mRNA-seq reads from the read mapping point of vew.
\code{\\
subread-align -i mm10\_full\_index -n 35 -m 4 -M 3 -T 10 -I 0 -P 3 -B 10 \\
subread-align -t 1 -i mm10\_full\_index -n 35 -m 4 -M 3 -T 10 -I 0 -P 3 -B 10 \\
-r miRNA\_reads.fastq -o result.sam\\
}
......@@ -664,6 +678,8 @@ The multiple locations reported for the reads could be useful for investigating
The miRBase database (\url{http://www.mirbase.org/}) is a useful resource that includes annotations for miRNA genes in many species.
The {\featureCounts} program can be readily used for summarizing reads to miRNA genes.
\chapter{Read summarization}
\section{Introduction}
......@@ -679,16 +695,16 @@ Here we describe the {\featureCounts} program, an efficient and accurate read qu
{\featureCounts} has the following features:
\begin{itemize}
\item It carries out precise and accurate read assignments by taking care of indels, junctions and structural variants in the reads.
\item It takes only $\sim$1 minute to summarize 20 million read pairs of reads to 26 thousand RefSeq genes.
\item It supports GTF/SAF format annotation and SAM/BAM read data.
\item It supports strand-specific read summarization.
\item It can perform read summarization at both feature level (eg. exon level) and meta-feature level (eg. gene level).
\item It allows users to specify whether reads overlapping with more than one feature should be counted or not.
\item It takes only half a minute to summarize 20 million reads.
\item It supports GTF and SAF format annotation.
\item It supports strand-specific read counting.
\item It can count reads at feature (eg. exon) or meta-feature (eg. gene) level.
\item Highly flexible in counting multi-mapping and multi-overlapping reads. Such reads can be excluded, fully counted or fractionally counted.
\item It gives users full control on the summarization of paired-end reads, including allowing them to check if both ends are mapped and/or if the fragment length falls within the specified range.
\item It can discriminate the features that were overlapped by both ends of the fragment from the features that were overlapped by only one end of the same fragment to get more accurate read assignments.
\item Reduce ambuiguity in assigning read pairs by searching features that overlap with both reads from the pair.
\item It allows users to specify whether chimeric fragments should be counted.
\item It automatically detects the read input format (SAM or BAM).
\item It automatically re-order paired-end reads if reads belonging to the same pair are not adjacent to each other in input read files.
\item Automatically detect input format (SAM or BAM).
\item Automatically sort paired-end reads. Users can provide either location-sorted or name-sorted bams files to featureCounts. Read sorting is implemented on the fly and it only incurs minimal time cost.
\end{itemize}
\section{featureCounts}
......@@ -741,8 +757,8 @@ Chromosomal names included in the \code{Chr} column must match the chromosomal n
Reads may be paired or unpaired.
If paired reads are used, then each pair of reads defines a DNA or RNA fragment bookended by the two reads.
In this case, {\featureCounts} can be instructed to count fragments rather than reads.
{\featureCounts} automatically sorts reads by name if paired reads are not in consecutive positions in the SAM or BAM file.
Users do not need sort their paired reads before providing them to {\featureCounts}.
{\featureCounts} automatically sorts reads by name if paired reads are not in consecutive positions in the SAM or BAM file, with minimal cost.
Users do not need to sort their paired reads before providing them to {\featureCounts}.
\subsection{Features and meta-features}
......@@ -753,12 +769,30 @@ We recommend to use unique gene identifiers, such as NCBI Entrez gene identifier
\subsection{Overlap of reads with features}
{\featureCounts} preforms precise read assignment by comparing mapping location of every base in the read or fragment with the genomic region spanned by each feature. It takes account of any gaps (insertions, deletions, exon-exon junctions or structural variants) that are found in the read. It calls a hit if any overlap (1bp or more) is found between the read or fragment and a feature.
A hit is called for a meta-feature if the read or fragment overlaps any component feature of the meta-feature.
{\featureCounts} preforms precise read assignment by comparing mapping location of every base in the read or fragment with the genomic region spanned by each feature.
It takes account of any gaps (insertions, deletions, exon-exon junctions or structural variants) that are found in the read.
It calls a hit if any overlap is found between read and feature.
Users may use `--minOverlap (\code{minOverlap} in \R)' and `--fracOverlap (\code{fracOverlap} in \R)' options to specify the minimum number of overlapping bases and minimum fraction of overlapping bases requried for assigning a read to a feature, respectively.
The `--fracOverlap' option might be particularly useful for counting reads with variable lengths.
% A hit is called for a meta-feature if the read or fragment overlaps any component feature of the meta-feature.
\subsection{Multiple overlaps}
\subsection{Multi-mapping reads}
A multi-overlap read or fragment is one that overlaps more than one feature, or more than one meta-feature when summarizing at the meta-feature level. {\featureCounts} provides users with the option to either exclude multi-overlap reads or to count them for each feature that is overlapped. The decision whether or not to counting these reads is often determined by the experiment type. We recommend that reads or fragments overlapping more than one gene are not counted for RNA-seq experiments, because any single fragment must originate from only one of the target genes but the identity of the true target gene cannot be confidently determined. On the other hand, we recommend that multi-overlap reads or fragments are counted for most ChIP-seq experiments because epigenetic modifications inferred from these reads may regulate the biological functions of all their overlapping genes.
A multi-mapping read is a read that can be equally best mapped to more than one location in the reference genome.
Due to the mapping amguity, it is recommended that multi-mapping reads should be excluded from read counting (default behavior of {\featureCounts} program) to produce as accurate counts as possible.
However we do provide users with different options to deal with the counting of such reads.
Users can choose to discard multi-mapping reads, or fully count every alignment reported for a multi-mapping read (ie. each alignment carries 1 count) or count each alignment fractionally (ie. each alignment carries $1/n$ count where $n$ is the total number of alignments reported for the read).
Relevent parameters for counting multi-mapping reads include `-M' (\code{countMultiMappingReads} in \R) and `--fraction' (\code{fraction} in \R).
\subsection{Multi-overlap reads}
A multi-overlap read or fragment is one that overlaps more than one feature, or more than one meta-feature when summarizing at the meta-feature level. {\featureCounts} provides users with the option to exclude multi-overlap reads, or fully count them for each overlapping feature (ie. each overlapping feature receives a count of 1 from the read) or assign a fractional count to each overlapping feature.
Relevent parameters for counting multi-overlap reads include `-O' (\code{allowMultiOverlap} in \R) and `--fraction' (\code{fraction} in \R).
The decision whether or not to counting these reads is often determined by the experiment type. We recommend that reads or fragments overlapping more than one gene are not counted for RNA-seq experiments, because any single fragment must originate from only one of the target genes but the identity of the true target gene cannot be confidently determined. On the other hand, we recommend that multi-overlap reads or fragments are counted for most ChIP-seq experiments because epigenetic modifications inferred from these reads may regulate the biological functions of all their overlapping genes.
Note that, when counting at the meta-feature level, reads that overlap multiple features of the same meta-feature are always counted exactly once for that meta-feature, provided there is no overlap with any other meta-feature. For example, an exon-spanning read will be counted only once for the corresponding gene even if it overlaps with more than one exon.
......@@ -809,7 +843,7 @@ Table 3 describes the parameters used by the {\featureCounts} program.
\pagebreak
\begin{longtable}{|p{5cm}|p{11cm}|}
\multicolumn{2}{p{16cm}}{Table 3: arguments used by the {\featureCounts} program included in the SourceForge {\Subread} package.
\multicolumn{2}{p{16cm}}{Table 3: Arguments used by the {\featureCounts} program included in the SourceForge {\Subread} package in alphabetical order.
Arguments included in parenthesis are the equivalent parameters used by {\featureCounts} function in Bioconductor {\Rsubread} package.}
\endfirsthead
\hline
......@@ -819,7 +853,7 @@ input\_files \newline (\code{files}) & Give the names of input read files that i
\hline
-a $<input> \newline (\code{annot.ext, annot.inbuilt}) $ & Give the name of an annotation file. \\
\hline
-A \newline (\code{chrAliases}) & Give the name of a file that contains aliases of chromosome names. The file should be a comma delimited text file that includes two columns. The first column gives the chromosome names used in the annotation and the second column gives the chromosome names used by reads. This file should not contain header lines. Names included in this file are case sensitive.\\
-A \newline (\code{chrAliases}) & Provide a chromosome name alias file to match chr names in annotation with those in the reads. This should be a two-column comma-delimited text file. Its first column should include chr names in the annotation and its second column should include chr names in the reads. Chr names are case sensitive. No column header should be included in the file.\\
\hline
-B \newline (\code{requireBothEndsMapped}) & If specified, only fragments that have both ends successfully aligned will be considered for summarization. This option should be used together with \code{-p} (or \code{isPairedEnd} in {\Rsubread} {\featureCounts}).\\
\hline
......@@ -835,7 +869,8 @@ input\_files \newline (\code{files}) & Give the names of input read files that i
\hline
-g $<input>$ \newline (\code{GTF.attrType}) & Specify the attribute type used to group features (eg. exons) into meta-features (eg. genes) when GTF annotation is provided. `gene\_id' by default. This attribute type is usually the gene identifier. This argument is useful for the meta-feature level summarization.\\
\hline
-G $<input>$ \newline (\code{genome}) & Provide the name of a FASTA-format file that includes the reference genome sequences. The reference genome provided here should be the same as the one used in read mapping.\\
-G $<input>$ \newline (\code{genome}) & Provide the name of a FASTA-format file that contains the reference sequences used in
read mapping that produced the provided SAM/BAM files. This optional argument can be used with '-J' option to improve read counting for junctions.\\
\hline
-J \newline (\code{juncCounts}) & Count the number of reads supporting each exon-exon junction. Junctions are identified from those exon-spanning reads (containing `N' in CIGAR string) in input data. For each junction, the reported data include number of supporting reads, genes that the junction belongs to, chromosomal coordinates of splice sites etc.\\
\hline
......@@ -851,9 +886,9 @@ input\_files \newline (\code{files}) & Give the names of input read files that i
\hline
-Q $<int>$ \newline (\code{minMQS}) & The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 0 by default.\\
\hline
-R & Output read assignment results for each read (or fragment if paired end). They are saved to a tab-delimited file that contains four columns including read name, status(assigned or the reason if not assigned), name of target feature/meta-feature and number of hits if the read/fragment is counted multiple times. Name of the file is the input file name added with a suffix `.featureCounts'.\\
-R \newline (\code{reportReads}) & Output detailed read assignment results for each read (or fragment if paired end). They are saved to a tab-delimited file that contains four columns including read name, status(assigned or the reason if not assigned), name of target feature/meta-feature and total number of hits if the read/fragment is counted multiple times. Names of output files are the same as input file names except a suffix string `.featureCounts' is added.\\
\hline
-s $<int>$ \newline (\code{isStrandSpecific}) & Indicate if strand-specific read counting should be performed. It has three possible values: 0 (unstranded), 1 (stranded) and 2 (reversely stranded). 0 by default. For paired-end reads, strand of the first read is taken as the strand of the whole fragment and FLAG field of the current read is used to tell if it is the first read in the fragment.\\
-s $<int>$ \newline (\code{isStrandSpecific}) & Indicate if strand-specific read counting should be performed. Acceptable values: 0 (unstranded), 1 (stranded) and 2 (reversely stranded). 0 by default. For paired-end reads, strand of the first read is taken as the strand of the whole fragment and FLAG field of the current read is used to tell if it is the first read in the fragment.\\
\hline
-t $<input>$ \newline (\code{GTF.featureType}) & Specify the feature type. Only rows which have the matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default.\\
\hline
......@@ -861,13 +896,15 @@ input\_files \newline (\code{files}) & Give the names of input read files that i
\hline
-v & Output version of the program. \\
\hline
$--$countSplitAlignmentsOnly \newline (\code{splitOnly}) & If specified, only split alignments (CIGAR strings contain letter `N') will be counted. All the other alignments will be ignored. An example of split alignments is the exon-spanning reads in RNA-seq data. If exon-spanning reads need to be assigned to all their overlapping exons, `-f' and `-O' options should be provided as well.\\
$--$countSplit \newline AlignmentsOnly \newline (\code{splitOnly}) & If specified, only split alignments (CIGAR strings contain letter `N') will be counted. All the other alignments will be ignored. An example of split alignments is the exon-spanning reads in RNA-seq data. If exon-spanning reads need to be assigned to all their overlapping exons, `-f' and `-O' options should be provided as well.\\
\hline
$--$countNonSplitAlignmentsOnly \newline (\code{nonSplitOnly}) & If specified, only non-split alignments (CIGAR strings do not contain letter `N') will be counted. All the other alignments will be ignored.\\
$--$countNonSplit \newline AlignmentsOnly \newline (\code{nonSplitOnly}) & If specified, only non-split alignments (CIGAR strings do not contain letter `N') will be counted. All the other alignments will be ignored.\\
\hline
$--$donotsort \newline (\code{autosort}) & If specified, paired end reads will not be re-ordered even if reads from the same pair were found not to be next to each other in the input.\\
\hline
$--$fraction \newline (\code{fraction}) & If specified, a fractional count 1/n will be generated for each multi-mapping read, where n is the number of alignments (indicated by `NH' tag) reported for the read. This option must be used together with the `-M' option.\\
$--$fraction \newline (\code{fraction}) & Assign fractional counts to features. This option must be used together with '-M' or '-O' or both. When '-M' is specified, each reported alignment from a multi-mapping read (identified via `NH' tag) will carry a fractional count of 1/x, instead of 1 (one), where x is the total number of alignments reported for the same read. When '-O' is specified, each overlapping feature will receive a fractional count of 1/y, where y is the total number of features overlapping with the read. When both '-M' and '-O' are specified, each alignment will carry a fraction count of 1/(x*y).\\
\hline
$--$fracOverlap $<value>$ \newline (\code{fracOverlap}) & Minimum fraction of overlapping bases in a read that is required for read assignment. Value should be within range [0,1]. 0 by default. If paired end, number of overlapping bases is counted from both reads. Soft-clipped bases are counted when calculating total read length (but ignored when counting overlapping bases). Both this option and `--minOverlap' option need to be satisfied for read assignment. \\
\hline
$--$ignoreDup \newline (\code{ignoreDup}) & If specified, reads that were marked as duplicates will be ignored. Bit Ox400 in FLAG field of SAM/BAM file is used for identifying duplicate reads. In paired end data, the entire read pair will be ignored if at least one end is found to be a duplicate read.\\
\hline
......@@ -875,7 +912,7 @@ $--$largestOverlap \newline (\code{largestOverlap}) & If specified, reads (or fr
\hline
$--$maxMOp $<int>$ \newline (\code{maxMOp}) & Specify the maximum number of `M' operations (matches or mis-matches) allowed in a CIGAR string. 10 by default. Both `X' and `=' operations are treated as `M' and adjacent `M' operations are merged in the CIGAR string. When the number of `M' operations exceeds the limit, only the first `maxMOp' number of `M' operations will be used in read assignment.\\
\hline
$--$minOverlap $<int>$ \newline (\code{minOverlap}) & Specify the minimum required number of overlapping bases between a read (or a fragment) and an overlapping feature. 1 by default. If a negative value is provided, the read will be extended from both ends.\\
$--$minOverlap $<int>$ \newline (\code{minOverlap}) & Minimum number of overlapping bases in a read that is required for read assignment. 1 by default. If a negative value is provided, then a gap of up to specified size will be allowed between read and the feature that the read is assigned to. For assignment of read pairs (fragments), number of overlapping bases from each read from the same pair will be summed. \\
\hline
$--$primary \newline (\code{primaryOnly}) & If specified, only primary alignments will be counted. Primary and secondary alignments are identified using bit 0x100 in the Flag field of SAM/BAM files. All primary alignments in a dataset will be counted no matter they are from multi-mapping reads or not (ie. `-M' is ignored).\\
\hline
......@@ -885,6 +922,8 @@ $--$readExtension5 $<int>$ \newline (\code{readExtension5}) & Reads are extended
\hline
$--$readExtension3 $<int>$ \newline (\code{readExtension3}) & Reads are extended downstream by $<int>$ bases from their 3' end. 0 by default.\\
\hline
$--$tmpDir $<string>$ \newline (\code{tmpDir}) & Directory under which intermediate files are saved (later removed). By default, intermediate files will be saved to the directory specified in `-o' argument (In \R, intermediate files are saved to the current working directory by default).\\
\hline
\end{longtable}
\pagebreak
......@@ -1026,7 +1065,7 @@ The complete list of parameters used by \code{exactSNP} can be found in Table 4.
\begin{longtable}{|p{4.5cm}|p{11cm}|}
\multicolumn{2}{p{16cm}}{Table 4: arguments used by the \code{exactSNP} program included in the SourceForge {\Subread} package.
\multicolumn{2}{p{16cm}}{Table 4: Arguments used by the \code{exactSNP} program included in the SourceForge {\Subread} package in alphabetical order.
Arguments included in parenthesis are the equivalent parameters used by \code{exactSNP} function in Bioconductor {\Rsubread} package.}
\endfirsthead
\hline
......
all:
@echo
@echo " Subread currently supports Linux, Mac OS X, FreeBSD and Solaris. Please choose the correct Makefile to build Subread."
@echo " Subread currently supports Linux, Mac OS X and FreeBSD. Please choose the correct Makefile to build Subread."
@echo
@echo " For building subread in Linux, please run ' make -f Makefile.Linux '."
@echo " For building subread in Mac OS X, please run ' make -f Makefile.MacOS '."
......
......@@ -21,7 +21,7 @@ all: featureCounts removeDup exactSNP subread-buildindex subindel subread-align
@echo
@echo "###########################################################"
@echo "# #"
@echo "# Installation finished. #"
@echo "# Installation complete. #"
@echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #"
......
......@@ -20,7 +20,7 @@ all: repair featureCounts removeDup exactSNP subread-buildindex subindel subrea
@echo
@echo "###########################################################"
@echo "# #"
@echo "# Installation finished. #"
@echo "# Installation complete. #"
@echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #"
......
......@@ -18,7 +18,7 @@ all: repair featureCounts removeDup exactSNP subread-buildindex subindel subrea
@echo
@echo "###########################################################"
@echo "# #"
@echo "# Installation finished. #"
@echo "# Installation complete. #"
@echo "# #"
@echo "# Generated executables were copied to directory ../bin/ #"
@echo "# #"
......
......@@ -156,7 +156,8 @@ double fisher_exact_test(int a, int b, int c, int d)
if(1){
double ret = fast_fisher_test_one_side(a,b,c,d, precalculated_factorial, PRECALCULATE_FACTORIAL);
//SUBREADprintf("FISHER_RES %d %d %d %d %.9f %.9f\n", a,b,c,d, ret, log(ret));
//#warning ">>>>>> FOR ACCURACY MEASUREMENT ONLY <<<<<<<<"
//SUBREADprintf("FISHER_RES %d %d %d %d %.19G %.19G\n", a,b,c,d, ret, log(ret));
return ret;
}else{
int AA=a, BB=b, CC=c, DD=d;
......@@ -1735,6 +1736,11 @@ int main_snp_calling_test(int argc,char ** argv)
}
}
if(argc > optind){
SUBREADprintf("Invalid parameter '%s'\n", argv[optind]);
return -1;
}
if(out_BED_file[0]==0 || in_FASTA_file[0]==0 || (parameters.pile_file_name [0] == 0 && in_SAM_file[0]==0))
{
SUBREADprintf("The names of the input file, the output file and the reference sequence file must be specified using -i, -o and -g options.\n");
......
......@@ -1447,10 +1447,11 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
if(first_correct_base < last_correct_base || first_correct_base > last_correct_base + 3000)
SUBREADprintf("WRONG ORDER: F=%u, L=%d\n", first_correct_base , last_correct_base);
//int last_second_part_base = find_subread_end(read_len, global_context->config.total_subreads , next_correct_subread_last) ;
if(0 && FIXLENstrcmp("DB7DT8Q1:236:C2NGTACXX:2:1213:17842:64278", read_name) == 0)
//#warning ">>>>>>> COMMENT NEXT BLOCK IN RELEASE <<<<<<<<"
if(0 && FIXLENstrcmp("D00491:277:C89FUANXX:7:1110:20418:31541", read_name) == 0)
//if(current_result->selected_position > 433897 - 100 && current_result->selected_position < 433897)
SUBREADprintf("INDEL_P03: I=%d; INDELS=%d; POS=%u; COVER=%d -- %d\n", i, indels, current_result->selected_position, last_correct_subread, next_correct_subread);
SUBREADprintf("INDEL_P03: I=%d; INDELS=%d; POS=%u; COVER=%d -- %d (vote_no : %d - %d)\n", i, indels, current_result->selected_position, last_correct_base, first_correct_base, last_correct_subread, next_correct_subread);
if(global_context -> config.use_dynamic_programming_indel || read_len > EXON_LONG_READ_LENGTH)
{
......@@ -1467,7 +1468,8 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
movement_buffer[dyna_steps]=0;
if(0 && FIXLENstrcmp("DB7DT8Q1:236:C2NGTACXX:2:1213:17842:64278", read_name) == 0)
//#warning ">>>>>>> COMMENT NEXT BLOCK IN RELEASE <<<<<<<<"
if(0 && FIXLENstrcmp("D00491:277:C89FUANXX:7:1110:20418:31541", read_name) == 0)
{
SUBREADprintf("IR= %d %d~%d\n", dyna_steps, last_correct_base, first_correct_base);
......@@ -1518,20 +1520,26 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
// let's test if it is ambiguous:
gene_value_index_t * current_value_index = thread_context?thread_context->current_value_index:global_context->current_value_index;
int ambiguous_i, ambiguous_count=0;
int best_matched_bases = match_chro(read_text + cursor_on_read - 6, current_value_index, indel_left_boundary - 6, 6, 0, global_context->config.space_type) +
match_chro(read_text + cursor_on_read - min(current_indel_len,0), current_value_index, indel_left_boundary + max(0, current_indel_len), 6, 0, global_context->config.space_type);
for(ambiguous_i=-5; ambiguous_i<=5; ambiguous_i++)
{
int left_match = match_chro(read_text + cursor_on_read - 6, current_value_index, indel_left_boundary - 6, 6+ambiguous_i, 0, global_context->config.space_type);
int right_match = match_chro(read_text + cursor_on_read + ambiguous_i - min(current_indel_len,0), current_value_index, indel_left_boundary + ambiguous_i + max(0, current_indel_len), 6-ambiguous_i, 0,global_context->config.space_type);
if(left_match+right_match == best_matched_bases) ambiguous_count ++;
//#warning ">>>>>>> COMMENT NEXT BLOCK IN RELEASE <<<<<<<<"
if(0 && FIXLENstrcmp("D00491:277:C89FUANXX:7:1110:20418:31541",read_name ) == 0){
SUBREADprintf("INDEL_DDADD: abs(I=%d); INDELS=%d; PN=%d; LOC=%ul READ_CUR=%d\n",i, current_indel_len, pair_number, indel_left_boundary-1, cursor_on_read);
}
//#warning "=========== COMMENT THIS LINE ==============="
if(0 && FIXLENstrcmp("DB7DT8Q1:236:C2NGTACXX:2:1213:17842:64278", read_name) == 0)
SUBREADprintf("INDEL_DDADD: abs(I=%d); INDELS=%d; PN=%d; LOC=%u\n",i, current_indel_len, pair_number, indel_left_boundary-1);
int ambiguous_count=0;
//#warning " >>>>>>>> MAKE SURE DISABLING THE NEXT BLOCK IS HARMLESS <<<<<<<<< "
if(0){
int ambiguous_i, best_matched_bases = match_chro(read_text + cursor_on_read - 6, current_value_index, indel_left_boundary - 6, 6, 0, global_context->config.space_type) +
match_chro(read_text + cursor_on_read - min(current_indel_len,0), current_value_index, indel_left_boundary + max(0, current_indel_len), 6, 0, global_context->config.space_type);
for(ambiguous_i=-5; ambiguous_i<=5; ambiguous_i++)
{
int left_match = match_chro(read_text + cursor_on_read - 6, current_value_index, indel_left_boundary - 6, 6+ambiguous_i, 0, global_context->config.space_type);
int right_match = match_chro(read_text + cursor_on_read + ambiguous_i - min(current_indel_len,0), current_value_index, indel_left_boundary + ambiguous_i + max(0, current_indel_len), 6-ambiguous_i, 0,global_context->config.space_type);
if(left_match+right_match == best_matched_bases) ambiguous_count ++;
}
}
if(abs(current_indel_len)<=global_context -> config.max_indel_length)
{
chromosome_event_t * new_event = local_add_indel_event(global_context, thread_context, event_table, read_text + cursor_on_read + min(0,current_indel_len), indel_left_boundary - 1, current_indel_len, 1, ambiguous_count, 0);
......
......@@ -499,6 +499,11 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
}
}
if(argc > optind){
SUBREADprintf("Invalid parameter '%s'\n", argv[optind]);
return -1;
}
global_context->config.more_accurate_fusions = global_context->config.more_accurate_fusions && global_context->config.do_fusion_detection;
if(global_context->config.more_accurate_fusions)
{
......
......@@ -538,6 +538,12 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
}
}
if(argc > optind){
SUBREADprintf("Invalid parameter '%s'\n", argv[optind]);
return -1;
}
if(global_context->config.is_SAM_file_input) global_context->config.phred_score_format = FASTQ_PHRED33;
global_context->config.more_accurate_fusions = global_context->config.more_accurate_fusions && global_context->config.do_fusion_detection;
......
......@@ -2012,7 +2012,7 @@ int find_donor_receptor(global_context_t * global_context, thread_context_t * th
char out1pos[100];
absoffset_to_posstr(global_context, search_in_chro_start, out1pos);
if(1 || FIXLENstrcmp("chr14:105",out1pos)==0){
if(0 && FIXLENstrcmp("chr14:105",out1pos)==0){
SUBREADprintf("POS=%s\t\tINS=%d\t\t%s\n", out1pos, best_insertion_in_between, rname);
SUBREADprintf("R= %s\nS1=%s%s\nS2=%s%s\n %s|%s|\n\n", rtext, sp1s, chro_bases_startside, sp1s, chro_bases_endside, spE, spBB);
}
......
......@@ -63,6 +63,25 @@ void core_version_number(char * program)
SUBREADprintf("\n%s v%s\n\n" , program, SUBREAD_VERSION);
}
int is_valid_float(char * optarg, char * optname){
int xk1=0;
while(1){
int nch = optarg[xk1++];
if(!nch){
if(xk1 == 1){
SUBREADprintf("Value for argumant %s-%s is missing.\n", optname[1]?"-":"", optname);
return 0;
}
break;
}
if(( nch!='-' || xk1 > 1 ) && nch != '.' && !isdigit(nch)){
SUBREADprintf("Value for argumant %s-%s is not a valid number: '%s'\n", optname[1]?"-":"", optname, optarg);
return 0;
}
}
return 1;
}
int is_valid_digit(char * optarg, char * optname){
int xk1=0;
while(1){
......@@ -1187,10 +1206,6 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
is_r_OK = (current_result -> mapping_result -> result_flags & CORE_IS_FULLY_EXPLAINED) > 0;
if(is_r_OK)
if((current_result -> mapping_result -> result_flags & CORE_IS_BREAKEVEN) && !global_context -> config.report_multi_mapping_reads)
is_r_OK = 0;
if(0 && FIXLENstrcmp("V0112_0155:7:1101:5279:29143#ATCACG", read_name) == 0)
SUBREADprintf("%s R_%d CPOINT1 : is_OK=%d\n", read_name , is_second_read + 1 , is_r_OK);
......@@ -1205,9 +1220,13 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
r->is_first_section_jumpped = is_first_section_jumped;
r->linear_position = current_result -> first_base_position;
r->mapping_quality = current_result -> final_quality;
if(current_result -> mapping_result -> result_flags & CORE_IS_BREAKEVEN)
r ->mapping_quality = 0;
//r->mapping_quality = current_result -> final_quality;
r->mapping_quality = 40; // changed on 10-JUN-2016
if(current_result -> realign_flags & CORE_IS_BREAKEVEN)
r -> mapping_quality = 0;
else
r -> mapping_quality /= current_result->MAPQ_adjustment;
//SUBREADprintf("REP=%d\n", current_result->MAPQ_adjustment);
strcpy(r->cigar, r -> current_cigar_decompress);
r->strand = (current_result -> mapping_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
......@@ -1219,7 +1238,7 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
if(global_context -> config.do_fusion_detection)
{
chimeric_sections = chimeric_cigar_parts(global_context, r->linear_position, is_first_section_jumped ^ current_strand, is_first_section_jumped, r->current_cigar_decompress, r->out_poses, output_context->out_cigar_buffer, r->out_strands, read_len, r->out_lens);
chimeric_sections = chimeric_cigar_parts(global_context, r->linear_position, is_first_section_jumped ^ current_strand, is_first_section_jumped, r->current_cigar_decompress, r->out_poses, output_context->out_cigar_buffer, r->out_strands, read_len, r->out_lens, read_name);
if(chimeric_sections > 0){
int xk1;
......@@ -2543,6 +2562,7 @@ int do_iteration_two(global_context_t * global_context, thread_context_t * threa
//memset(final_MATCH_buffer, 0, sizeof(int) * 2 * global_context -> config.multi_best_reads);
//memset(final_MISMATCH_buffer, 0, sizeof(int) * 2 * global_context -> config.multi_best_reads);
int r1_candidate_locations = 0, r2_candidate_locations = 0;
int r1_step2_locations = 0, r2_step2_locations = 0;
clear_repeated_buffer(global_context, repeated_buffer_pos, repeated_buffer_cigars, &repeated_count);
for (is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read ++)
......@@ -2594,6 +2614,14 @@ int do_iteration_two(global_context_t * global_context, thread_context_t * threa
current_result -> result_flags &= ~CORE_IS_FULLY_EXPLAINED;
if(is_second_read) r2_step2_locations = best_read_id + 1;
else r1_step2_locations = best_read_id + 1;
if(0 && FIXLENstrcmp("V0112_0155:7:1101:7921:2517#ACTTGA", read_name_1)==0){
SUBREADprintf("R1 N2=%d, R2 N2=%d, R%d : pos=%u\n", r1_step2_locations, r2_step2_locations, is_second_read+1, current_result->selected_position);
}
unsigned int final_alignments = explain_read(global_context, thread_context , final_realignments + (is_second_read + 2 * best_read_id) * MAX_ALIGNMENT_PER_ANCHOR,
current_read_number, current_rlen, current_read_name, current_read, current_qual, is_second_read, best_read_id, is_negative_strand);
......@@ -2641,9 +2669,9 @@ int do_iteration_two(global_context_t * global_context, thread_context_t * threa
unsigned int scores_array [global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR];
for(read_record_i = 0; read_record_i < current_candidate_locations; read_record_i++){
realignment_result_t * current_realignment_result = final_realignments + current_realignment_index[read_record_i];
mapping_result_t *current_result = current_realignment_result -> mapping_result;
assert(current_result -> result_flags & CORE_IS_FULLY_EXPLAINED);
//realignment_result_t * current_realignment_result = final_realignments + current_realignment_index[read_record_i];
//mapping_result_t *current_result = current_realignment_result -> mapping_result;
//assert(current_result -> result_flags & CORE_IS_FULLY_EXPLAINED);
unsigned int this_MATCH = current_MATCH_buffer[read_record_i];
unsigned int this_MISMATCH = current_MISMATCH_buffer[read_record_i];
......@@ -2682,6 +2710,8 @@ int do_iteration_two(global_context_t * global_context, thread_context_t * threa
}
if(highest_score_occurence<2 || global_context -> config.report_multi_mapping_reads){
int is_break_even = 0;
if(highest_score_occurence>1) is_break_even = 1;
highest_score_occurence = min(highest_score_occurence, global_context -> config.reported_multi_best_reads);
for(read_record_i = 0; read_record_i < current_candidate_locations ; read_record_i++){
......@@ -2694,6 +2724,8 @@ int do_iteration_two(global_context_t * global_context, thread_context_t * threa
strcpy(qual_text_1, raw_qual_text_1);
strcpy(qual_text_2, raw_qual_text_2);
if(is_break_even) current_realignment_result -> realign_flags |= CORE_IS_BREAKEVEN;
current_realignment_result -> MAPQ_adjustment = current_MISMATCH_buffer [read_record_i] + ( is_second_read?(r2_step2_locations): (r1_step2_locations));
//if(161430 <= current_read_number) SUBREADprintf("ALL_SE=%d, THIS_HIT=%d\n", highest_score_occurence, output_cursor);
//if(161436 == current_read_number)SUBREADprintf("DOUBLE_ADD_SE for %d (%p): %u %d/%d, BEST=%d\n", scores_array[read_record_i] , current_realignment_result, current_read_number, output_cursor , highest_score_occurence, best_score_highest);
......@@ -2793,6 +2825,10 @@ int do_iteration_two(global_context_t * global_context, thread_context_t * threa
//SUBREADprintf("Highest score = %llu, Occurance = %d\n", highest_score , highest_score_occurence);
// Then, copy the (R1, R2) that have the highest score into the align_res buffer.
if(highest_score_occurence <= 1 || global_context -> config.report_multi_mapping_reads){
int is_break_even = 0;
if(highest_score_occurence>1)is_break_even = 1;
highest_score_occurence = min(highest_score_occurence, global_context -> config.reported_multi_best_reads);
for(r1_best_id = 0; r1_best_id < r1_candidate_locations; r1_best_id ++)
{
......@@ -2814,7 +2850,15 @@ int do_iteration_two(global_context_t * global_context, thread_context_t * threa
strcpy(qual_text_1, raw_qual_text_1);
strcpy(qual_text_2, raw_qual_text_2);
//if(161436 == current_read_number)SUBREADprintf("DOUBLE_ADD_PE: %u %d/%d\n", current_read_number, output_cursor , highest_score_occurence);
if(is_break_even){
r1_realign -> realign_flags |= CORE_IS_BREAKEVEN;
r2_realign -> realign_flags |= CORE_IS_BREAKEVEN;
}
r1_realign -> MAPQ_adjustment = r1_step2_locations + final_MISMATCH_buffer1[r1_best_id];
r2_realign -> MAPQ_adjustment = r2_step2_locations + final_MISMATCH_buffer2[r2_best_id];
//SUBREADprintf("R1R2_Rep = %d,%d\n", r1_step2_locations,r2_step2_locations);
write_realignments_for_fragment(global_context, thread_context, &out_context, current_read_number, r1_realign, r2_realign, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, read_len_1, read_len_2, highest_score_occurence, output_cursor, non_informative_subreads_r1, non_informative_subreads_r2);
output_cursor ++;
}
......@@ -3068,7 +3112,8 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
{
//SUBREADprintf("P%d %llu %s\n", is_reversed, current_read_number, read_name_1);
if(0 && FIXLENstrcmp("R003577537", read_name_1) ==0 ) {
//#warning ">>>>>>> DISABLE THE FOLLOING BLOCK <<<<<<"
if(0 && FIXLENstrcmp("V0112_0155:7:1101:7921:2517#ACTTGA", read_name_1) ==0 ) {
SUBREADprintf(">>>%llu<<<\n%s [%d] %s\n%s [%d] %s\n", current_read_number, read_name_1, read_len_1, read_text_1, read_name_2, read_len_2, read_text_2);
SUBREADprintf(" ======= PAIR %s = %llu ; NON_INFORMATIVE = %d, %d =======\n", read_name_1, current_read_number, vote_1 -> noninformative_subreads, vote_2 -> noninformative_subreads);
print_votes(vote_1, global_context -> config.index_prefix);
......@@ -4134,11 +4179,11 @@ void absoffset_to_posstr(global_context_t * global_context, unsigned int pos, ch
sprintf(res, "%s:%u", ch, off);
}
int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos, int is_first_section_negative_strand, int is_first_section_reversed, char * in_cigar, unsigned int * out_poses, char ** out_cigars, char * out_strands, int read_len, short * perfect_lens)
int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos, int is_first_section_negative_strand, int is_first_section_reversed, char * in_cigar, unsigned int * out_poses, char ** out_cigars, char * out_strands, int read_len, short * perfect_lens, char * read_name)
{
unsigned int current_perfect_map_start = sel_pos;
int current_perfect_section_no = 0;
int current_perfect_cursor = sel_pos;
unsigned int current_perfect_cursor = sel_pos;
int is_reversed = is_first_section_reversed;
int is_negative = is_first_section_negative_strand;
int read_cursor = 0;
......@@ -4204,15 +4249,22 @@ int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos
assert(new_chr);
is_chro_jump = (curr_chr != new_chr);
long long int dist = current_perfect_cursor;
dist -= jummped_location;
long long int dist = (long long int)current_perfect_cursor;
dist -= (long long int)jummped_location;
if(abs(dist) >= global_context -> config.maximum_intron_length)
is_long_jump = 1;
//#warning ">>>>>> COMMENT WHEN RELEASE <<<<<<"
if(0 && FIXLENstrcmp("R000000007", read_name)==0)
SUBREADprintf("dist=%lld, abs=%lld, %u, %u\n",dist,abs(dist), current_perfect_cursor, jummped_location);
// A long jump is the jump longer than 2^27.
// Picard does not like it!!
}
// is_long_jump is true only if the two sections are on different chromosomes.
//#warning ">>>>>> COMMENT WHEN RELEASE <<<<<<"
if(0 && FIXLENstrcmp("R000000007", read_name)==0)
SUBREADprintf("CHR_JMP=%d, NCCH=%c, LONG_JMP=%d\n", is_chro_jump, ncch, is_long_jump);
if(is_chro_jump || islower(ncch) || ncch == 'B' || is_long_jump)
{
current_perfect_cursor = jummped_location;
......
......@@ -372,7 +372,7 @@ typedef struct{
short realign_flags;
short final_quality;
short chromosomal_length;
short MAPQ_adjustment;
} realignment_result_t;
#define BUCKETED_TABLE_INIT_ITEMS 3
......@@ -619,7 +619,7 @@ unsigned short * _global_retrieve_big_margin_ptr(global_context_t * global_conte
// The first base in the read actually has a larger coordinate than Pos.
unsigned int reverse_cigar(unsigned int pos, char * cigar, char * new_cigar);
int chimeric_cigar_parts(global_context_t * global_context , unsigned int sel_pos, int is_first_section_negative_strand, int is_first_section_reversed, char * in_cigar, unsigned int * out_poses, char ** out_cigars, char * out_strands, int read_len, short * out_read_lens);
int chimeric_cigar_parts(global_context_t * global_context , unsigned int sel_pos, int is_first_section_negative_strand, int is_first_section_reversed, char * in_cigar, unsigned int * out_poses, char ** out_cigars, char * out_strands, int read_len, short * out_read_lens, char * read_name);
void warning_file_limit();
void quick_sort(void * arr,int arr_size, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r));
......@@ -637,5 +637,6 @@ int FIXLENstrcmp(char * fixed_len, char * rname);
int is_valid_digit(char * optarg, char * optname);
int is_valid_digit_range(char * optarg, char * optname, int min, int max_inc);
int is_valid_float(char * optarg, char * optname);
int exec_cmd(char * cmd, char * outstr, int out_limit);
#endif
......@@ -888,9 +888,12 @@ int match_chro(char * read, gene_value_index_t * index, unsigned int pos, int te
case 'C':
ret += tt==2;
break;
case 0:
//SUBREADprintf("NON-ATGC-CHAR:%d\n", tv);
//assert(0);
break;
default:
ret += tt==3;
break;
}
offset_bit+=2;
......