Skip to content
Commits on Source (5)
......@@ -47,7 +47,10 @@ endif()
find_package(ZLIB REQUIRED)
find_package(Threads REQUIRED)
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release)
endif()
if(WIN32)
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
else()
......@@ -60,7 +63,6 @@ include_directories(
add_executable(diamond src/run/main.cpp
src/basic/config.cpp
src/util/tinythread.cpp
src/basic/score_matrix.cpp
src/blast/blast_filter.cpp
src/blast/blast_seg.cpp
......@@ -95,15 +97,12 @@ add_executable(diamond src/run/main.cpp
src/dp/padded_banded_sw.cpp
src/dp/needleman_wunsch.cpp
src/output/blast_pairwise_format.cpp
src/extra/roc.cpp
src/dp/comp_based_stats.cpp
src/extra/model_sim.cpp
src/run/double_indexed.cpp
src/search/collision.cpp
src/output/sam_format.cpp
src/align/align.cpp
src/search/setup.cpp
src/extra/opt.cpp
src/dp/diag_scores.cpp
src/data/taxonomy.cpp
src/lib/tantan/tantan.cc
......@@ -146,6 +145,10 @@ add_executable(diamond src/run/main.cpp
src/util/algo/greedy_vortex_cover.cpp
src/util/algo/greedy_vortex_cover_weighted.cpp
src/util/sequence/sequence.cpp
src/tools/tsv_record.cpp
src/tools/tools.cpp
src/util/system/getRSS.cpp
src/util/math/sparse_matrix.cpp
)
if(EXTRA)
......@@ -154,6 +157,9 @@ if(EXTRA)
src/extra/benchmark.cpp
src/extra/test.cpp
src/dp/sw_3frame.cpp
src/extra/roc.cpp
src/extra/model_sim.cpp
src/extra/opt.cpp
)
add_definitions(-DEXTRA)
endif()
......
......@@ -21,6 +21,11 @@ Keep posted about new developments by following me on
![image](https://anaconda.org/bioconda/diamond/badges/platforms.svg)
[![image](https://anaconda.org/bioconda/diamond/badges/downloads.svg)](https://anaconda.org/bioconda/diamond)
Support
=======
The preferred support channel is the [Diamond community website](http://www.diamondsearch.org/). It provides a platform for users to exchange their experiences and get support directly from the developer. You may also use the GitHub issue tracker or send inquiries by email.
Quick start guide
=================
......@@ -32,7 +37,7 @@ quick example for setting up and using the program on Linux.
Installing the software on your system may be done by downloading it in
binary format for immediate use:
wget http://github.com/bbuchfink/diamond/releases/download/v0.9.24/diamond-linux64.tar.gz
wget http://github.com/bbuchfink/diamond/releases/download/v0.9.25/diamond-linux64.tar.gz
tar xzf diamond-linux64.tar.gz
The extracted `diamond` binary file should be moved to a directory
......
gcc -c -O3 -DNDEBUG src/blast/sm_blosum45.c src/blast/sm_blosum50.c src/blast/sm_blosum62.c src/blast/sm_blosum80.c src/blast/sm_blosum90.c src/blast/sm_pam30.c src/blast/sm_pam70.c src/blast/sm_pam250.c
g++ -DNDEBUG -O3 -Wno-deprecated-declarations $1 $2 $3 \
g++ -std=gnu++11 -DNDEBUG -O3 -Wno-deprecated-declarations $1 $2 $3 \
sm*.o \
src/run/main.cpp \
src/basic/config.cpp \
src/util/tinythread.cpp \
src/basic/score_matrix.cpp \
src/blast/blast_filter.cpp \
src/blast/blast_seg.cpp \
......@@ -30,15 +29,12 @@ g++ -DNDEBUG -O3 -Wno-deprecated-declarations $1 $2 $3 \
src/dp/padded_banded_sw.cpp \
src/dp/needleman_wunsch.cpp \
src/output/blast_pairwise_format.cpp \
src/extra/roc.cpp \
src/dp/comp_based_stats.cpp \
src/extra/model_sim.cpp \
src/run/double_indexed.cpp \
src/search/collision.cpp \
src/output/sam_format.cpp \
src/align/align.cpp \
src/search/setup.cpp \
src/extra/opt.cpp \
src/dp/diag_scores.cpp \
src/data/taxonomy.cpp \
src/lib/tantan/tantan.cc \
......@@ -81,4 +77,8 @@ g++ -DNDEBUG -O3 -Wno-deprecated-declarations $1 $2 $3 \
src/util/algo/greedy_vortex_cover.cpp \
src/util/algo/greedy_vortex_cover_weighted.cpp \
src/util/sequence/sequence.cpp \
src/tools/tsv_record.cpp \
src/tools/tools.cpp \
src/util/system/getRSS.cpp \
src/util/math/sparse_matrix.cpp \
-lz -lpthread -o diamond
diamond-aligner (0.9.25+dfsg-1) UNRELEASED; urgency=medium
* New upstream version
* debhelper-compat 12
* Standards-Version: 4.4.0
-- Andreas Tille <tille@debian.org> Wed, 31 Jul 2019 14:57:15 +0200
diamond-aligner (0.9.24+dfsg-1) unstable; urgency=medium
* New upstream version
......
......@@ -3,10 +3,10 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.
Uploaders: Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~),
Build-Depends: debhelper-compat (= 12),
cmake,
zlib1g-dev
Standards-Version: 4.2.1
Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/diamond-aligner
Vcs-Git: https://salsa.debian.org/med-team/diamond-aligner.git
Homepage: https://github.com/bbuchfink/diamond
......
[0.9.25]
- Added support for the `sscinames` output field to print subject scientific names to the tabular output format.
- Fixed a compiler error for GCC 8.2.
- Added option `--stop-match-score` to set the match score of stop codons.
- Fixed a bug that caused the `qqual` output field to not be correctly clipped to the aligned part of the query.
- Added output fields `qseq_gapped` and `sseq_gapped` to the tabular format.
- Raised compiler requirement to GCC 4.8.
- Fixed a bug that caused the final sequence positions to not be printed in the pairwise format.
- Allow using `--min-score` instead of `--top` for the LCA computation of the taxonomy output format.
- Reduced the number of temporary files.
- Added output field `qstrand` to the tabular format.
- Database format version changed to 3.
- Fixed a bug in the `--range-culling` mode that could cause undefined behaviour.
[0.9.24]
- Fixed a compiler error on macOS.
- Added --header option to output header for tabular output format.
......
......@@ -137,14 +137,15 @@ void align_queries(Trace_pt_buffer &trace_pts, Consumer* output_file, const Para
timer.go("Computing alignments");
Align_fetcher::init(query_range.first, query_range.second, v->begin(), v->end());
OutputSink::instance = unique_ptr<OutputSink>(new OutputSink(query_range.first, output_file));
Thread_pool threads;
vector<thread> threads;
if (config.verbosity >= 3 && config.load_balancing == Config::query_parallel)
threads.push_back(launch_thread(heartbeat_worker, query_range.second));
threads.emplace_back(heartbeat_worker, query_range.second);
size_t n_threads = config.load_balancing == Config::query_parallel ? (config.threads_align == 0 ? config.threads_ : config.threads_align) : 1;
for (size_t i = 0; i < n_threads; ++i)
//threads.push_back(launch_thread(static_cast<void(*)(size_t)>(&align_worker), i));
threads.push_back(launch_thread(align_worker, i, &params, &metadata));
threads.join_all();
threads.emplace_back(align_worker, i, &params, &metadata);
for (auto &t : threads)
t.join();
timer.finish();
double t = timer.get();
......
......@@ -40,9 +40,9 @@ struct Target : public ::Target
interval ungapped_query_range(int query_dna_len) const
{
const int i0 = std::max((int)top_hit.query_pos_ - (int)top_hit.subject_pos_, 0),
i1 = std::min((int)top_hit.query_pos_ + (int)subject.length() - (int)top_hit.subject_pos_, query_dna_len / 3);
const Frame f = Frame(top_hit.frame_);
const int i0 = std::max((int)top_hit.query_pos_ - (int)top_hit.subject_pos_, 0),
i1 = std::min((int)top_hit.query_pos_ + (int)subject.length() - (int)top_hit.subject_pos_, f.length(query_dna_len));
return TranslatedPosition::absolute_interval(TranslatedPosition(i0, f), TranslatedPosition(i1, f), query_dna_len);
}
......@@ -198,11 +198,12 @@ void Pipeline::run(Statistics &stat)
const size_t interval_count = (source_query_len + ::Target::INTERVAL - 1) / ::Target::INTERVAL;
for (vector<unsigned> &v : intervals)
v.resize(interval_count);
Thread_pool threads;
vector<thread> threads;
Atomic<size_t> next(0);
for (unsigned i = 0; i < config.threads_; ++i)
threads.push_back(launch_thread(build_ranking_worker, targets.begin(), targets.end(), &next, &intervals[i]));
threads.join_all();
threads.emplace_back(build_ranking_worker, targets.begin(), targets.end(), &next, &intervals[i]);
for (auto &t : threads)
t.join();
timer.go("Merging score ranking intervals");
for (auto it = intervals.begin() + 1; it < intervals.end(); ++it) {
......
......@@ -22,7 +22,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include <queue>
#include <vector>
#include <list>
#include "../util/tinythread.h"
#include "../search/trace_pt_buffer.h"
#include "../data/queries.h"
#include "../util/ptr_vector.h"
......
......@@ -24,7 +24,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "sequence.h"
#include "masking.h"
const char* Const::version_string = "0.9.24";
const char* Const::version_string = "0.9.25";
const char* Const::program_name = "diamond";
const char* Const::id_delimiters = " \a\b\f\n\r\t\v\1";
......
......@@ -21,7 +21,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "config.h"
#include "../util/util.h"
#include "../util/log_stream.h"
#include "../util/tinythread.h"
#include "../basic/value.h"
#include "score_matrix.h"
#include "../util/system.h"
......@@ -71,7 +70,9 @@ Config::Config(int argc, const char **argv)
.add_command("smith-waterman", "")
.add_command("protein-snps", "")
.add_command("cluster", "")
.add_command("translate", "");
.add_command("translate", "")
.add_command("filter-blasttab", "")
.add_command("show-cbs", "");
Options_group general("General options");
general.add()
......@@ -116,7 +117,9 @@ Config::Config(int argc, const char **argv)
\tsalltitles means All Subject Title(s), separated by a '<>'\n\
\tqcovhsp means Query Coverage Per HSP\n\
\tqtitle means Query title\n\
\tqqual means Query quality values\n\
\tqqual means Query quality values for the aligned part of the query\n\
\tfull_qqual means Query quality values\n\
\tqstrand means Query strand\n\
\n\tDefault: qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore", output_format)
("verbose", 'v', "verbose console output", verbose)
("log", 0, "enable debug log", debug_log)
......@@ -167,6 +170,7 @@ Config::Config(int argc, const char **argv)
("no-self-hits", 0, "suppress reporting of identical self hits", no_self_hits)
("taxonmap", 0, "protein accession to taxid mapping file", prot_accession2taxid)
("taxonnodes", 0, "taxonomy nodes.dmp from NCBI", nodesdmp)
("taxonnames", 0, "taxonomy names.dmp from NCBI", namesdmp)
("taxonlist", 0, "restrict search to list of taxon ids (comma-separated)", taxonlist);
Options_group advanced("Advanced options");
......@@ -192,7 +196,8 @@ Config::Config(int argc, const char **argv)
("range-cover", 0, "percentage of query range to be covered for hit culling (default=50)", query_range_cover, 50.0)
("dbsize", 0, "effective database size (in letters)", db_size)
("no-auto-append", 0, "disable auto appending of DAA and DMND file extensions", no_auto_append)
("xml-blord-format", 0, "Use gnl|BL_ORD_ID| style format in XML output", xml_blord_format);
("xml-blord-format", 0, "Use gnl|BL_ORD_ID| style format in XML output", xml_blord_format)
("stop-match-score", 0, "Set the match score of stop codons against each other.", stop_match_score, 1);
Options_group view_options("View options");
view_options.add()
......@@ -266,7 +271,13 @@ Config::Config(int argc, const char **argv)
("use-dataset-field", 0, "", use_dataset_field)
("store-query-quality", 0, "", store_query_quality)
("swipe-chunk-size", 0, "", swipe_chunk_size, 256u)
("query-parallel-limit", 0, "", query_parallel_limit, 1000000u);
("query-parallel-limit", 0, "", query_parallel_limit, 1000000u)
("hard-masked", 0, "", hardmasked)
("cbs-window", 0, "", cbs_window, 40)
("tantan-r", 0, "", tantan_r, 0.005)
("tantan-s", 0, "", tantan_s, 0.5)
("no-unlink", 0, "", no_unlink)
("no-dict", 0, "", no_dict);
parser.add(general).add(makedb).add(aligner).add(advanced).add(view_options).add(getseq_options).add(hidden_options);
parser.store(argc, argv, command);
......@@ -361,13 +372,15 @@ Config::Config(int argc, const char **argv)
auto_append_extension(output_file, ".gz");
}
message_stream << Const::program_name << " v" << Const::version_string << "." << (unsigned)Const::build_version << " | by Benjamin Buchfink <buchfink@gmail.com>" << endl;
message_stream << "Licensed under the GNU GPL <https://www.gnu.org/licenses/gpl.txt>" << endl;
message_stream << "Check http://github.com/bbuchfink/diamond for updates." << endl << endl;
ostream &header_out = command == Config::help ? cout : cerr;
header_out << Const::program_name << " v" << Const::version_string << "." << (unsigned)Const::build_version << " | by Benjamin Buchfink <buchfink@gmail.com>" << endl;
header_out << "Licensed under the GNU GPL <https://www.gnu.org/licenses/gpl.txt>" << endl;
header_out << "Check http://github.com/bbuchfink/diamond for updates." << endl << endl;
log_stream << Const::program_name << " v" << Const::version_string << "." << (unsigned)Const::build_version << endl;
#ifndef NDEBUG
verbose_stream << "Assertions enabled." << endl;
#endif
set_option(threads_, tthread::thread::hardware_concurrency());
set_option(threads_, std::thread::hardware_concurrency());
switch (command) {
case Config::makedb:
......@@ -394,7 +407,7 @@ Config::Config(int argc, const char **argv)
if (query_range_culling && frame_shift == 0)
throw std::runtime_error("Query range culling is only supported in frameshift alignment mode (option -F).");
if (matrix_file == "")
score_matrix = Score_matrix(to_upper_case(matrix), gap_open, gap_extend, frame_shift);
score_matrix = Score_matrix(to_upper_case(matrix), gap_open, gap_extend, frame_shift, stop_match_score);
else {
if (lambda == 0 || K == 0)
throw std::runtime_error("Custom scoring matrices require setting the --lambda and --K options.");
......
......@@ -171,11 +171,19 @@ struct Config
bool output_header;
string alfmt;
string unfmt;
string namesdmp;
bool hardmasked;
int cbs_window;
double tantan_r;
double tantan_s;
bool no_unlink;
bool no_dict;
int stop_match_score;
enum {
makedb = 0, blastp = 1, blastx = 2, view = 3, help = 4, version = 5, getseq = 6, benchmark = 7, random_seqs = 8, compare = 9, sort = 10, roc = 11, db_stat = 12, model_sim = 13,
match_file_stat = 14, model_seqs = 15, opt = 16, mask = 17, fastq2fasta = 18, dbinfo = 19, test_extra = 20, test_io = 21, db_annot_stats = 22, read_sim = 23, info = 24, seed_stat = 25,
smith_waterman = 26, protein_snps = 27, cluster = 28, translate = 29
smith_waterman = 26, protein_snps = 27, cluster = 28, translate = 29, filter_blasttab = 30, show_cbs = 31
};
unsigned command;
......
......@@ -23,7 +23,7 @@ struct Const
{
enum {
build_version = 125,
build_version = 126,
seedp_bits = 10,
seedp = 1<<seedp_bits,
max_seed_weight = 32,
......
......@@ -46,20 +46,20 @@ void Masking::operator()(Letter *seq, size_t len) const
{
tantan::maskSequences((tantan::uchar*)seq, (tantan::uchar*)(seq + len), 50,
(tantan::const_double_ptr*)probMatrixPointers_,
0.005, 0.05,
config.tantan_r, 0.05,
0.9,
0, 0,
0.5, (const tantan::uchar*)mask_table_x_);
config.tantan_s, (const tantan::uchar*)mask_table_x_);
}
void Masking::mask_bit(Letter *seq, size_t len) const
{
tantan::maskSequences((tantan::uchar*)seq, (tantan::uchar*)(seq + len), 50,
(tantan::const_double_ptr*)probMatrixPointers_,
0.005, 0.05,
config.tantan_r, 0.05,
0.9,
0, 0,
0.5, (const tantan::uchar*)mask_table_bit_);
config.tantan_s, (const tantan::uchar*)mask_table_bit_);
}
void Masking::bit_to_hard_mask(Letter *seq, size_t len, size_t &n) const
......@@ -90,9 +90,10 @@ void mask_worker(Atomic<size_t> *next, Sequence_set *seqs, const Masking *maskin
void mask_seqs(Sequence_set &seqs, const Masking &masking, bool hard_mask)
{
Thread_pool threads;
vector<thread> threads;
Atomic<size_t> next(0);
for (size_t i = 0; i < config.threads_; ++i)
threads.push_back(launch_thread(mask_worker, &next, &seqs, &masking, hard_mask));
threads.join_all();
threads.emplace_back(mask_worker, &next, &seqs, &masking, hard_mask);
for (auto &t : threads)
t.join();
}
\ No newline at end of file
......@@ -212,18 +212,18 @@ const Matrix_info Matrix_info::matrices[] = {
{ "DNA", dna_values, (const char*)DNA_scores, 1, 4, 2 }
};
Score_matrix::Score_matrix(const string & matrix, int gap_open, int gap_extend, int frameshift, uint64_t db_letters):
Score_matrix::Score_matrix(const string & matrix, int gap_open, int gap_extend, int frameshift, int stop_match_score, uint64_t db_letters):
gap_open_ (gap_open == -1 ? Matrix_info::get(matrix).default_gap_open : gap_open),
gap_extend_ (gap_extend == -1 ? Matrix_info::get(matrix).default_gap_extend : gap_extend),
frame_shift_(frameshift),
db_letters_ ((double)db_letters),
constants_ (Matrix_info::get(matrix).get_constants(gap_open_, gap_extend_)),
name_(matrix),
matrix8_(Matrix_info::get(matrix).scores),
matrix8_(Matrix_info::get(matrix).scores, stop_match_score),
bias_((char)(-low_score())),
matrix8u_(Matrix_info::get(matrix).scores, bias_),
matrix16_(Matrix_info::get(matrix).scores),
matrix32_(Matrix_info::get(matrix).scores)
matrix8u_(Matrix_info::get(matrix).scores, stop_match_score, bias_),
matrix16_(Matrix_info::get(matrix).scores, stop_match_score),
matrix32_(Matrix_info::get(matrix).scores, stop_match_score)
{ }
char Score_matrix::low_score() const
......
......@@ -36,7 +36,7 @@ struct Score_matrix
{
Score_matrix() {}
Score_matrix(const string &matrix, int gap_open, int gap_extend, int frame_shift, uint64_t db_letters = 0);
Score_matrix(const string &matrix, int gap_open, int gap_extend, int frame_shift, int stop_match_score, uint64_t db_letters = 0);
Score_matrix(const string &matrix_file, double lambda, double K, int gap_open, int gap_extend, uint64_t db_letters = 0);
friend std::ostream& operator<<(std::ostream& s, const Score_matrix &m);
......@@ -137,12 +137,14 @@ private:
struct Scores
{
Scores() {}
Scores(const char *scores, char bias = 0)
Scores(const char *scores, int stop_match_score = 1, char bias = 0)
{
const unsigned n = value_traits.alphabet_size;
for (unsigned i = 0; i < 32; ++i)
for (unsigned j = 0; j < 32; ++j)
data[i * 32 + j] = i < n && j < n ? (_t)(scores[i*n + j] + (int)bias) : -(std::numeric_limits<_t>::max() / 2);
if (stop_match_score != 1)
data[24 * 32 + 24] = stop_match_score;
}
#ifdef _MSC_VER
__declspec(align(16)) _t data[32 * 32];
......
......@@ -21,6 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include <iostream>
#include <vector>
#include <string>
#include "../basic/value.h"
#include "../util/binary_buffer.h"
#include "../util/text_buffer.h"
......@@ -32,6 +33,7 @@ struct sequence
{
static const char DELIMITER = '\x1f';
struct Reversed {};
struct Hardmasked {};
sequence():
len_ (0),
clipping_offset_ (0),
......@@ -125,6 +127,17 @@ struct sequence
}
return os;
}
TextBuffer& print(TextBuffer &os, const Value_traits &v, Hardmasked) const
{
for (unsigned i = 0; i < len_; ++i) {
long l = (long)data_[i];
if ((l & 128) == 0)
os << v.alphabet[l];
else
os << v.alphabet[(long)v.mask_char];
}
return os;
}
TextBuffer& print(TextBuffer &os, const Value_traits &v, Reversed) const
{
for (int i = (int)len_ - 1; i >= 0; --i)
......@@ -135,6 +148,9 @@ struct sequence
{
return sequence(*this, begin, end - 1);
}
std::string substr(int begin, int end) const {
return std::string(&data_[begin], &data_[end]);
}
friend TextBuffer& operator<<(TextBuffer &buf, const sequence &s)
{
return s.print(buf, value_traits);
......
......@@ -22,7 +22,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include <algorithm>
#include <stdint.h>
#include <string.h>
#include "../util/tinythread.h"
#include <mutex>
#include "../util/log_stream.h"
#include "../util/memory/memory_pool.h"
......@@ -101,7 +101,7 @@ struct Statistics
}
stat_type data_[COUNT];
tthread::mutex mtx_;
std::mutex mtx_;
};
......
......@@ -19,6 +19,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef TRANSLATED_POSITION_H_
#define TRANSLATED_POSITION_H_
#include <algorithm>
#include "../util/interval.h"
enum Strand { FORWARD = 0, REVERSE = 1 };
......@@ -43,6 +44,10 @@ struct Frame
{
return (offset + 1) * (strand == FORWARD ? 1 : -1);
}
int length(int dna_len) const
{
return std::max((dna_len - offset) / 3, 0);
}
int offset;
Strand strand;
};
......