Skip to content
Commits on Source (7)
......@@ -831,10 +831,6 @@ int main(int argc, char *argv[])
std::cout << BlasrHelp(params) << std::endl;
std::exit(EXIT_SUCCESS); // Not a failure.
}
if (params.printDiscussion) {
std::cout << BlasrDiscussion();
std::exit(EXIT_SUCCESS); // Not a failure.
}
if (argc < 3) {
std::cout << BlasrConciseHelp();
std::exit(EXIT_FAILURE); // A failure.
......
// Copyright (c) 2014-2015, Pacific Biosciences of California, Inc.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted (subject to the limitations in the
// disclaimer below) provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
//
// * Neither the name of Pacific Biosciences nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
// GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
// BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
// USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
Copyright (c) 2014-2018, Pacific Biosciences of California, Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted (subject to the limitations in the
disclaimer below) provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Pacific Biosciences nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC
BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
......@@ -6,23 +6,15 @@
***
## Availability
The latest release binary can be installed via [bioconda](https://bioconda.github.io/):
Latest version can be installed via bioconda package `blasr`.
conda install -c bioconda blasr
Official support is only provided for official and stable
[SMRT Analysis builds](http://www.pacb.com/products-and-services/analytical-software/)
provided by PacBio and not for source builds.
Unofficial support for binary releases is provided via github issues,
not via mail to developers.
**No** support for source builds; issues will be closed without comment.
Please refer to our [official pbbioconda page](https://github.com/PacificBiosciences/pbbioconda)
for information on Installation, Support, License, Copyright, and Disclaimer.
## Documentation
- [Wiki: all you need to know](https://github.com/PacificBiosciences/blasr/wiki)
- [Running BLASR](doc/EXAMPLES.md)
- [Developer environment](doc/DEVELOPER.md)
DISCLAIMER
----------
......
......@@ -10,6 +10,7 @@ Test blasr with --bam
$ rm -rf $OUTDIR/ecoli_subset.sam
$ $BLASR_EXE $DATDIR/ecoli_subset.fasta $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/ecoli_subset.bam --nproc 15
[INFO]* (glob)
WARNING, BAM output without PacBio BAM or DATASET input is not supported by PacBio.
[INFO]* (glob)
$ $SAMTOOLS_EXE view -h $OUTDIR/ecoli_subset.bam > $OUTDIR/ecoli_subset.sam
......
Test input query is not pb bam, output is sam/bam, alignment has more than 65535 cigar ops.
Set up
$ mkdir -p $OUTDIR
$ Q=$DATDIR/long-cigar/query.fasta
$ T=$DATDIR/long-cigar/ref.fasta
Test m4 output
$ O=$OUTDIR/long-cigar.sam
$ $BLASR_EXE $Q $T --minMatch 15 --maxMatch 25 --advanceHalf --advanceExactMatches 10 --bestn 1 --nproc 1 --noSplitSubreads --out $O 1>/dev/null 2>/dev/null && echo $?
0
$ grep 'CG:' $O |wc -l
1
......@@ -32,6 +32,7 @@ blasr_fast_test_list = [
['holeNumbers', 'INTERMEDIATE'],
# ['bug25328', 'SLOW'],
# ['pgc-big', 'SLOW'],
# ['long-cigar', 'SLOW'],
]
blasr_test_remotedir = '/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest'
......
......@@ -53,6 +53,7 @@ Test bam output
$ O=$OUTDIR/pgc-naive.bam
$ $BLASR_EXE $Q $T --bam --out $O --bestn 1 --placeGapConsistently && echo $?
[INFO]* (glob)
WARNING, BAM output without PacBio BAM or DATASET input is not supported by PacBio.
[INFO]* (glob)
0
$ $SAMTOOLS_EXE view $O
......
blasr (5.3.3+dfsg-1) unstable; urgency=medium
* Afif removed himself from Uploaders (thanks for your work on this)
* Add myself to Uploaders
* New upstream version
* debhelper-compat 12
* Standards-Version: 4.4.0
-- Andreas Tille <tille@debian.org> Wed, 31 Jul 2019 14:55:41 +0200
blasr (5.3.2+dfsg-1.1) unstable; urgency=medium
* Non-maintainer upload.
......
Source: blasr
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 12~),
Build-Depends: debhelper-compat (= 12),
python,
meson,
pkg-config,
libboost-dev,
libhdf5-dev,
zlib1g-dev,
libpbseq-dev (>= 0~20160912),
libpbseq-dev,
libpbbam-dev (>= 0.18.0~),
libpbdata-dev,
libgtest-dev,
libblasr-dev (>= 5.3.1+dfsg-2)
Standards-Version: 4.3.0
Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/blasr
Vcs-Git: https://salsa.debian.org/med-team/blasr.git
Homepage: https://github.com/PacificBiosciences/blasr
......@@ -23,7 +24,7 @@ Package: blasr
Architecture: any
Depends: ${shlibs:Depends},
${misc:Depends},
libpbseq (>= 0~20160912)
libpbseq
Description: mapping single-molecule sequencing reads
Basic local alignment with successive refinement (BLASR) is a method
for mapping single-molecule sequencing reads against a reference genome.
......
## Running BLASR
Typing 'blasr -h' or 'blasr -help' on the command line will give you a
list of options. At the least, provide a fasta, fastq, or bas.h5 file,
and a genome.
list of options. At the least, provide reads and a genome, where reads file format can be
bam|dataset|fasta|fastq|bas.h5, and genome file format can be fasta|ReferenceSet.
### Some typical use cases
Align reads from reads.bas.h5 to ecoli_K12 genome, and output in SAM format.
Align subreads in movie.subreads.bam to ecoli_K12 genome, and output in BAM format.
blasr reads.bas.h5 ecoli_K12.fasta -sam
blasr movie.subreads.bam ecoli_K12.fasta --bam --out alignments.bam
Same as above, but with soft clipping
Align subreads in movie.subreadset.xml to ecoli_K12 genome, and output in BAM format.
blasr movie.subreadset.xml ecoli_K12.fasta --bam --out alignments.bam
Align subreads in movie.subreadset.xml to ecoli_K12 genome ReferenceSet, and output in BAM format.
blasr movie.subreadset.xml ecoli_K12.referenceset.xml --bam --out alignments.bam
blasr reads.bas.h5 ecoli_K12.fasta -sam -clipping soft
Align CCS reads in movie.consensusreadset.xml to ecoli_K12 genome, and output in BAM format.
Use multiple threads
blasr movie.consensusreadset.xml ecoli_K12.fasta --bam --out alignments.bam
blasr reads.bas.h5 ecoli_K12.fasta -sam -clipping soft -out alignments.sam -nproc 16
Use multiple threads, e.x., 16 threads
blasr movie.subreads.bam ecoli_K12.fasta --nproc 16
Include a larger minimal match, for faster but less sensitive alignments
blasr reads.bas.h5 ecoli_K12.fasta -sam -clipping soft -minMatch 15
blasr movie.subreads.bam ecoli_K12.fasta --minMatch 15
Produce alignments in a pairwise human readable format
blasr reads.bas.h5 ecoli_K12.fasta -m 0
blasr movie.subreads.bam ecoli_K12.fasta -m 0
Use a precomputed suffix array for faster startup
sawriter hg19.fasta.sa hg19.fasta #First precompute the suffix array
blasr reads.bas.h5 hg19.fasta -sa hg19.fasta.sa
sawriter ecoli_K12.fasta.sa ecoli_K12.fasta #First precompute the suffix array
blasr movie.subreads.bam ecoli_K12.fasta --sa ecoli_K12.fasta.sa
Align RSII reads from reads.bas.h5 to ecoli_K12 genome, and output in SAM format.
Use a precomputed BWT-FM index for smaller runtime memory footprint, but slower alignments.
blasr reads.bas.h5 ecoli_K12.fasta --sam --out alignments.sam
sa2bwt hg19.fasta hg19.fasta.sa hg19.fasta.bwt
blasr reads.bas.h5 hg19.fasta -bwt hg19.fasta.bwt
Same as above, but with soft clipping
blasr reads.bas.h5 ecoli_K12.fasta --sam --clipping soft --out alignments.sam
......@@ -168,7 +168,6 @@ public:
bool useQVScore;
int scoreType;
bool printVerboseHelp;
bool printDiscussion;
float sdpBypassThreshold;
bool computeAlignProbability;
float qvMatchWeight;
......@@ -346,7 +345,6 @@ public:
guidedAlignBandSize = 10;
useQVScore = false;
printVerboseHelp = false;
printDiscussion = false;
sdpBypassThreshold = 1000000.0;
scoreType = 0;
byAdapter = false;
......@@ -666,7 +664,7 @@ public:
// Only support two clipping methods: soft or subread.
clipping = SAMOutput::subread;
}
// Turn on fa fa -> bam pipe
// Turn on fa fa -> sam pipe
/*
if (queryFileType != FileType::PBBAM and queryFileType != FileType::PBDATASET and not enableHiddenPaths) {
// bax|fasta|fastq -> bam paths are turned off by default
......@@ -674,6 +672,7 @@ public:
std::exit(EXIT_FAILURE);
}
*/
if (outFileName == "") {
std::cout << "ERROR, SAM output file must be specified." << std::endl;
std::exit(EXIT_FAILURE);
......@@ -701,14 +700,14 @@ public:
// Only support two clipping methods: soft or subread.
clipping = SAMOutput::subread;
}
// Turn on fa fa -> bam pipe
/*
if (queryFileType != FileType::PBBAM and queryFileType != FileType::PBDATASET and not enableHiddenPaths) {
// bax|fasta|fastq -> bam paths are turned off by default
std::cout << "ERROR, could not output alignments in BAM unless input reads are in PacBio BAM or DATASET files." << std::endl;
std::exit(EXIT_FAILURE);
if (queryFileType != FileType::PBBAM and queryFileType != FileType::PBDATASET and
queryFileType != FileType::HDFCCS and queryFileType != FileType::HDFBase and
queryFileType != FileType::HDFPulse) {
// fasta|fastq -> bam paths are turned off by default
std::cout << "WARNING, BAM output without PacBio BAM or DATASET input is not "
"supported by PacBio."
<< std::endl;
}
*/
if (outFileName == "") {
std::cout << "ERROR, BAM output file must be specified." << std::endl;
std::exit(EXIT_FAILURE);
......
......@@ -188,7 +188,7 @@ void RegisterBlasrOptions(CommandLineParser& clp, MappingParameters& params)
clp.RegisterIntOption("-scoreType", &params.scoreType, "",
CommandLineParser::NonNegativeInteger);
clp.RegisterFlagOption("h", &params.printVerboseHelp, "");
clp.RegisterFlagOption("-help", &params.printDiscussion, "");
clp.RegisterFlagOption("-help", &params.printVerboseHelp, "");
clp.RegisterFloatOption("-accuracyPrior", &params.readAccuracyPrior, "",
CommandLineParser::NonNegativeFloat);
// holeNumberRangesStr is a string of comma-delimited hole number ranges, such as '1,2,3,10-15'.
......@@ -225,7 +225,7 @@ void RegisterBlasrOptions(CommandLineParser& clp, MappingParameters& params)
const std::string BlasrHelp(MappingParameters& params)
{
std::stringstream helpStream;
std::ostringstream helpStream;
helpStream
<< " Options for blasr " << std::endl
<< " Basic usage: 'blasr reads.{bam|fasta|bax.h5|fofn} genome.fasta [-options] "
......@@ -629,76 +629,9 @@ const std::string BlasrHelp(MappingParameters& params)
<< "Bioinformatics 2012, 13:238." << std::endl
<< "Please report any bugs to "
<< "'https://github.com/PacificBiosciences/blasr/issues'." << std::endl
<< std::endl;
return helpStream.str();
}
const std::string BlasrConciseHelp(void)
{
std::stringstream ss;
ss << "blasr - a program to map reads to a genome" << std::endl
<< " usage: blasr reads genome " << std::endl
<< " Run with -h for a list of commands " << std::endl
<< " --help for verbose discussion of how to run blasr." << std::endl
<< std::endl
<< "In release v5.1 of BLASR, command-line options will use the " << std::endl
<< "single dash/double dash convention: " << std::endl
<< "Character options are preceded by a single dash. (Example: -v) " << std::endl
<< "Word options are preceded by a double dash. (Example: --verbose) " << std::endl
<< "Please modify your scripts accordingly when BLASR v5.1 is released. " << std::endl
<< std::endl;
return ss.str();
}
const std::string BlasrSummaryHelp(void)
{
std::stringstream ss;
ss << " Basic usage: 'blasr reads.{bam|fasta|bax.h5|fofn} genome.fasta [-options] "
<< std::endl
<< " [option]\tDescription (default_value)." << std::endl
<< std::endl
<< " Input Files." << std::endl
<< " reads.bam is the NEW native output format for SMRT reads."
"This is the preferred input to blasr because rich quality"
"value (insertion,deletion, and substitution quality values) information is "
"maintained. The extra quality information improves variant detection and mapping"
<< "speed." << std::endl
<< " reads.fasta is a multi-fasta file of reads. While any fasta file is valid input, "
"it is preferable to use bax.h5 or plx.h5 files because they contain "
"more rich quality value information."
<< std::endl
<< " reads.bax.h5|reads.plx.h5 is the OLD (DEPRECATED) output format of "
"SMRT reads. "
<< std::endl
<< " reads.fofn File of file names accepted." << std::endl
<< std::endl;
return ss.str();
}
const std::string BlasrDiscussion(void)
{
std::stringstream ss;
ss << "NAME" << std::endl
<< " blasr - Map SMRT Sequences to a reference genome." << std::endl
<< std::endl
<< "SYNOPSIS" << std::endl
<< " blasr reads.bam genome.fasta --bam --out out.bam" << std::endl
<< std::endl
<< " blasr reads.fasta genome.fasta " << std::endl
<< std::endl
<< " blasr reads.fasta genome.fasta --sa genome.fasta.sa" << std::endl
<< std::endl
<< " blasr reads.bax.h5 genome.fasta [--sa genome.fasta.sa] " << std::endl
<< std::endl
<< " blasr reads.bax.h5 genome.fasta --sa genome.fasta.sa --maxScore 100 --minMatch "
"15 ... "
<< std::endl
<< std::endl
<< " blasr reads.bax.h5 genome.fasta --sa genome.fasta.sa --nproc 24 --out "
"alignment.out ... "
<< std::endl
<< std::endl
<< "DESCRIPTION " << std::endl
<< "DISCUSSION " << std::endl
<< " blasr is a read mapping program that maps reads to positions " << std::endl
<< " in a genome by clustering short exact matches between the read and" << std::endl
<< " the genome, and scoring clusters using alignment. The matches are" << std::endl
......@@ -720,16 +653,9 @@ const std::string BlasrDiscussion(void)
<< " output format is PacBio BAM." << std::endl
<< " Support to bax.h5 and plx.h5 files will be DEPRECATED." << std::endl
<< " Support to region tables for h5 files will be DEPRECATED." << std::endl
//<< " Read filtering information is contained in the .bax.h5 input files as" << std::endl
//<< " well as generated by other post-processing programs with analysis of" << std::endl
//<< " pulse files and read in from a separate .region.h5 file. The current" << std::endl
//<< " set of filters that are applied to reads are high quality region" << std::endl
//<< " filtering, and adapter filtering. Regions outside high-quality" << std::endl
//<< " regions are ignored in mapping. Reads that contain regions annotated" << std::endl
//<< " as adapter are split into non-adapter (template) regions, and mapped" << std::endl
//<< " separately." << std::endl
<< " " << std::endl
<< " When suffix array index of a genome is not specified, the suffix array is" << std::endl
<< " When suffix array index of a genome is not specified, the suffix array is"
<< std::endl
<< " built before producing alignment. This may be prohibitively slow" << std::endl
<< " when the genome is large (e.g. Human). It is best to precompute the" << std::endl
<< " suffix array of a genome using the program sawriter, and then specify" << std::endl
......@@ -745,16 +671,10 @@ const std::string BlasrDiscussion(void)
<< " Several methods may be used to speed up alignments, at the expense of" << std::endl
<< " possibly decreasing sensitivity. " << std::endl
<< " " << std::endl
// << " If the genome is highly repetitive or divergent from the read" << std::endl
// << " sequences, the value of -maxExpand should be increased. This option" << std::endl
// << " controls how much the search for anchors is expanded past a simple" << std::endl
// << " greedy search. A value for -maxExpand of 1 is sufficent for" << std::endl
// << " non-repetitive genomes, and values of -maxExpand greater than 5 are" << std::endl
// << " not recommended." << std::endl
// << " " << std::endl
<< " Regions that are too repetitive may be ignored during mapping by" << std::endl
<< " limiting the number of positions a read maps to with the" << std::endl
<< " -maxAnchorsPerPosition option. Values between 500 and 1000 are effective" << std::endl
<< " -maxAnchorsPerPosition option. Values between 500 and 1000 are effective"
<< std::endl
<< " in the human genome." << std::endl
<< " " << std::endl
<< " For small genomes such as bacterial genomes or BACs, the default parameters "
......@@ -762,5 +682,47 @@ const std::string BlasrDiscussion(void)
<< " are sufficient for maximal sensitivity and good speed." << std::endl
<< std::endl
<< std::endl;
return helpStream.str();
}
const std::string BlasrConciseHelp(void)
{
std::ostringstream ss;
ss << "blasr - a program to map reads to a genome" << std::endl
<< " usage: blasr reads genome " << std::endl
<< " Run with -h for a list of commands " << std::endl
<< " --help for verbose discussion of how to run blasr." << std::endl
<< std::endl
<< "In release v5.1 of BLASR, command-line options will use the " << std::endl
<< "single dash/double dash convention: " << std::endl
<< "Character options are preceded by a single dash. (Example: -v) " << std::endl
<< "Word options are preceded by a double dash. (Example: --verbose) " << std::endl
<< "Please modify your scripts accordingly when BLASR v5.1 is released. " << std::endl
<< std::endl;
return ss.str();
}
const std::string BlasrSummaryHelp(void)
{
std::ostringstream ss;
ss << " Basic usage: 'blasr reads.{bam|fasta|bax.h5|fofn} genome.fasta [-options] "
<< std::endl
<< " [option]\tDescription (default_value)." << std::endl
<< std::endl
<< " Input Files." << std::endl
<< " reads.bam is the NEW native output format for SMRT reads."
"This is the preferred input to blasr because rich quality"
"value (insertion,deletion, and substitution quality values) information is "
"maintained. The extra quality information improves variant detection and mapping"
<< "speed." << std::endl
<< " reads.fasta is a multi-fasta file of reads. While any fasta file is valid input, "
"it is preferable to use bax.h5 or plx.h5 files because they contain "
"more rich quality value information."
<< std::endl
<< " reads.bax.h5|reads.plx.h5 is the OLD (DEPRECATED) output format of "
"SMRT reads. "
<< std::endl
<< " reads.fofn File of file names accepted." << std::endl
<< std::endl;
return ss.str();
}
project(
'blasr',
'cpp',
version : '5.3.2',
version : '5.3.3',
default_options : [
'buildtype=release',
'warning_level=3',
......@@ -77,13 +77,13 @@ blasr_config.set('BLASR_VERSION_MAJOR', blasr_major_version)
blasr_config.set('BLASR_VERSION_MINOR', blasr_minor_version)
blasr_config.set('BLASR_VERSION_PATCH', blasr_patch_version)
configure_file(
blasr_BlasrVersion_h = configure_file(
input : 'BlasrVersion.h.in',
output : 'BlasrVersion.h',
configuration : blasr_config)
# replace git commit id
blasr_git_commit_id = vcs_tag(
blasr_BlasrGitHash_h = vcs_tag(
input : 'BlasrGitHash.h.in',
output : 'BlasrGitHash.h',
command : ['git', 'describe', '--always', '--dirty=*'],
......@@ -97,16 +97,17 @@ subdir('extrautils')
subdir('utils')
blasr_static_impl = static_library(
'blasr_impl', [
'blasr_impl',
blasr_sources,
blasr_git_commit_id],
install : false,
dependencies : blasr_deps,
cpp_args : [blasr_warning_flags, '-DUSE_PBBAM=1'])
blasr_main = executable(
'blasr', files([
'Blasr.cpp']),
'blasr', [
blasr_BlasrGitHash_h,
blasr_BlasrVersion_h,
files('Blasr.cpp')],
install : true,
dependencies : blasr_deps,
link_with : blasr_static_impl,
......