Skip to content
Commits on Source (3)
......@@ -2,6 +2,7 @@ cmake_minimum_required (VERSION 2.6)
project (DIAMOND)
option(BUILD_STATIC "BUILD_STATIC" OFF)
option(EXTRA "EXTRA" OFF)
if(BUILD_STATIC)
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
......@@ -32,7 +33,6 @@ include_directories(
add_executable(diamond src/run/main.cpp
src/basic/config.cpp
src/util/tinythread.cpp
src/util/compressed_stream.cpp
src/basic/score_matrix.cpp
src/blast/blast_filter.cpp
src/blast/blast_seg.cpp
......@@ -84,7 +84,6 @@ add_executable(diamond src/run/main.cpp
src/dp/banded_sw.cpp
src/data/sorted_list.cpp
src/data/seed_set.cpp
src/util/binary_file.cpp
src/util/simd.cpp
src/output/taxon_format.cpp
src/output/view.cpp
......@@ -95,8 +94,33 @@ add_executable(diamond src/run/main.cpp
src/dp/swipe/banded_3frame_swipe.cpp
src/dp/swipe/banded_swipe.cpp
src/align/banded_swipe_pipeline.cpp
src/data/ref_dictionary.cpp
src/util/io/compressed_stream.cpp
src/util/io/deserializer.cpp
src/util/io/file_sink.cpp
src/util/io/file_source.cpp
src/util/io/input_file.cpp
src/util/io/input_stream_buffer.cpp
src/util/io/output_file.cpp
src/util/io/output_stream_buffer.cpp
src/util/io/serializer.cpp
src/util/io/temp_file.cpp
src/util/io/text_input_file.cpp
src/data/taxon_list.cpp
src/data/taxonomy_nodes.cpp
src/util/algo/MurmurHash3.cpp
)
if(EXTRA)
target_sources(diamond
PUBLIC
src/extra/benchmark.cpp
src/extra/test.cpp
src/dp/sw_3frame.cpp
)
add_definitions(-DEXTRA)
endif()
target_link_libraries(diamond ${ZLIB_LIBRARY} ${CMAKE_THREAD_LIBS_INIT})
install(TARGETS diamond DESTINATION bin)
......@@ -9,16 +9,14 @@ DIAMOND is a sequence aligner for protein and translated DNA searches, designed
Follow me on Twitter to get notified of updates.
.. image:: https://img.shields.io/twitter/url/http/shields.io.svg?style=social&label=%40bbuchfink
.. image:: https://image.ibb.co/gAmVKR/twitter1.png
:target: https://twitter.com/bbuchfink
.. image:: https://badges.gitter.im/diamond-aligner/Lobby.svg
:alt: Join the chat at https://gitter.im/diamond-aligner/Lobby
:target: https://gitter.im/diamond-aligner/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
.. image:: https://img.shields.io/github/downloads/bbuchfink/diamond/total.svg?label=Downloads
:target: https://github.com/bbuchfink/diamond/releases/download/v0.9.14/diamond-linux64.tar.gz
.. image:: https://img.shields.io/conda/dn/bioconda/diamond.svg?label=BioConda%20install
.. image:: https://anaconda.org/bioconda/diamond/badges/downloads.svg
:target: https://anaconda.org/bioconda/diamond
.. image:: https://img.shields.io/badge/Google%20Scholar-446-blue.svg
.. image:: https://img.shields.io/badge/Google%20Scholar-491-blue.svg
:target: https://scholar.google.de/citations?user=kjPIF1cAAAAJ
Quick start guide
......@@ -27,7 +25,7 @@ Please read the `manual <https://github.com/bbuchfink/diamond/raw/master/diamond
Installing the software on your system may be done by downloading it in binary format for immediate use::
wget http://github.com/bbuchfink/diamond/releases/download/v0.9.17/diamond-linux64.tar.gz
wget http://github.com/bbuchfink/diamond/releases/download/v0.9.19/diamond-linux64.tar.gz
tar xzf diamond-linux64.tar.gz
The extracted ``diamond`` binary file should be moved to a directory contained in your executable search path (PATH environment variable).
......@@ -55,5 +53,3 @@ About
DIAMOND is developed by Benjamin Buchfink. Feel free to contact me for support (`Email <mailto:buchfink@gmail.com>`_ `Twitter <http://twitter.com/bbuchfink>`_).
If you use DIAMOND in published research, please cite B. Buchfink, Xie C., D. Huson, "Fast and sensitive protein alignment using DIAMOND", Nature Methods 12, 59-60 (2015).
.. image:: https://ga-beacon.appspot.com/UA-109667396-1/diamond?pixel
......@@ -4,7 +4,6 @@ g++ -DNDEBUG -O3 -Wno-deprecated-declarations $1 $2 $3 \
src/run/main.cpp \
src/basic/config.cpp \
src/util/tinythread.cpp \
src/util/compressed_stream.cpp \
src/basic/score_matrix.cpp \
src/blast/blast_filter.cpp \
src/blast/blast_seg.cpp \
......@@ -48,7 +47,6 @@ g++ -DNDEBUG -O3 -Wno-deprecated-declarations $1 $2 $3 \
src/dp/banded_sw.cpp \
src/data/sorted_list.cpp \
src/data/seed_set.cpp \
src/util/binary_file.cpp \
src/util/simd.cpp \
src/output/taxon_format.cpp \
src/output/view.cpp \
......@@ -59,4 +57,19 @@ g++ -DNDEBUG -O3 -Wno-deprecated-declarations $1 $2 $3 \
src/dp/swipe/banded_3frame_swipe.cpp \
src/dp/swipe/banded_swipe.cpp \
src/align/banded_swipe_pipeline.cpp \
src/data/ref_dictionary.cpp \
src/util/io/compressed_stream.cpp \
src/util/io/deserializer.cpp \
src/util/io/file_sink.cpp \
src/util/io/file_source.cpp \
src/util/io/input_file.cpp \
src/util/io/input_stream_buffer.cpp \
src/util/io/output_file.cpp \
src/util/io/output_stream_buffer.cpp \
src/util/io/serializer.cpp \
src/util/io/temp_file.cpp \
src/util/io/text_input_file.cpp \
src/data/taxon_list.cpp \
src/data/taxonomy_nodes.cpp \
src/util/algo/MurmurHash3.cpp \
-lz -lpthread -o diamond
diamond-aligner (0.9.19+dfsg-1) unstable; urgency=medium
* New upstream version
-- Andreas Tille <tille@debian.org> Thu, 29 Mar 2018 09:37:31 +0200
diamond-aligner (0.9.17+dfsg-1) unstable; urgency=medium
* New upstream version
......
[0.9.19]
- Fixed a bug in the --un function to write unaligned reads.
- Fixed an issue in the LCA algorithm that could cause an assignment to a higher node.
- --taxonmap and --taxonnodes parameters now apply exclusively to the makedb command.
- Added option --taxonlist to filter searches by subject taxonomic id.
- Changed database format; rebuilding is required.
[0.9.18]
- Optimized output writing performance.
- Fixed a bug in the XML output format.
[0.9.17]
- Fixed a compiler error on FreeBSD.
- Added option --range-culling.
......
......@@ -62,7 +62,7 @@ auto_ptr<Queue> Align_fetcher::queue_;
vector<hit>::iterator Align_fetcher::it_;
vector<hit>::iterator Align_fetcher::end_;
void align_worker(size_t thread_id)
void align_worker(size_t thread_id, const Parameters *params, const Metadata *metadata)
{
Align_fetcher hits;
Statistics stat;
......@@ -74,7 +74,7 @@ void align_worker(size_t thread_id)
buf = new TextBuffer;
const char *query_title = query_ids::get()[hits.query].c_str();
output_format->print_query_intro(hits.query, query_title, get_source_query_len((unsigned)hits.query), *buf, true);
output_format->print_query_epilog(*buf, query_title, true);
output_format->print_query_epilog(*buf, query_title, true, *params);
}
OutputSink::get().push(hits.query, buf);
continue;
......@@ -82,18 +82,18 @@ void align_worker(size_t thread_id)
QueryMapper *mapper;
if (config.ext == Config::swipe)
mapper = new ExtensionPipeline::Swipe::Pipeline(hits.query, hits.begin, hits.end);
mapper = new ExtensionPipeline::Swipe::Pipeline(*params, hits.query, hits.begin, hits.end);
else if (config.frame_shift != 0)
mapper = new ExtensionPipeline::BandedSwipe::Pipeline(hits.query, hits.begin, hits.end, dp_stat);
mapper = new ExtensionPipeline::BandedSwipe::Pipeline(*params, hits.query, hits.begin, hits.end, dp_stat);
else
mapper = new ExtensionPipeline::Greedy::Pipeline(hits.query, hits.begin, hits.end);
mapper = new ExtensionPipeline::Greedy::Pipeline(*params, hits.query, hits.begin, hits.end);
mapper->init();
mapper->run(stat);
TextBuffer *buf = 0;
if (*output_format != Output_format::null) {
buf = new TextBuffer;
const bool aligned = mapper->generate_output(*buf, stat);
const bool aligned = mapper->generate_output(*buf, stat, *metadata);
if (aligned && !config.unaligned.empty())
query_aligned[hits.query] = true;
}
......@@ -104,7 +104,7 @@ void align_worker(size_t thread_id)
::dp_stat += dp_stat;
}
void align_queries(Trace_pt_buffer &trace_pts, Output_stream* output_file)
void align_queries(Trace_pt_buffer &trace_pts, OutputFile* output_file, const Parameters &params, const Metadata &metadata)
{
const size_t max_size = (size_t)std::min(config.chunk_size*1e9 * 9 * 2 / config.lowmem, 2e9);
pair<size_t, size_t> query_range;
......@@ -127,7 +127,8 @@ void align_queries(Trace_pt_buffer &trace_pts, Output_stream* output_file)
threads.push_back(launch_thread(heartbeat_worker, query_range.second));
size_t n_threads = (config.threads_align == 0 ? config.threads_ : config.threads_align);
for (size_t i = 0; i < n_threads; ++i)
threads.push_back(launch_thread(static_cast<void(*)(size_t)>(&align_worker), i));
//threads.push_back(launch_thread(static_cast<void(*)(size_t)>(&align_worker), i));
threads.push_back(launch_thread(align_worker, i, &params, &metadata));
threads.join_all();
timer.finish();
......
......@@ -26,13 +26,14 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "../util/task_queue.h"
#include "../basic/statistics.h"
#include "query_mapper.h"
#include "../data/metadata.h"
using std::vector;
using std::auto_ptr;
struct Output_writer
{
Output_writer(Output_stream* f) :
Output_writer(OutputFile* f) :
f_(f)
{ }
void operator()(TextBuffer &buf)
......@@ -41,13 +42,13 @@ struct Output_writer
buf.clear();
}
private:
Output_stream* const f_;
OutputFile* const f_;
};
template<typename _buffer>
struct Ring_buffer_sink
{
Ring_buffer_sink(Output_stream *output_file):
Ring_buffer_sink(OutputFile *output_file):
writer(output_file),
queue(config.threads_ * 4, writer)
{}
......@@ -64,15 +65,15 @@ private:
Task_queue<_buffer, Output_writer> queue;
};
void align_queries(Trace_pt_buffer &trace_pts, Output_stream* output_file);
void align_queries(Trace_pt_buffer &trace_pts, OutputFile* output_file, const Parameters &params, const Metadata &metadata);
namespace ExtensionPipeline {
namespace Greedy {
struct Target;
struct Pipeline : public QueryMapper
{
Pipeline(size_t query_id, Trace_pt_list::iterator begin, Trace_pt_list::iterator end) :
QueryMapper(query_id, begin, end)
Pipeline(const Parameters &params, size_t query_id, Trace_pt_list::iterator begin, Trace_pt_list::iterator end) :
QueryMapper(params, query_id, begin, end)
{}
Target& target(size_t i);
virtual void run(Statistics &stat);
......@@ -82,8 +83,8 @@ namespace ExtensionPipeline {
namespace Swipe {
struct Pipeline : public QueryMapper
{
Pipeline(size_t query_id, Trace_pt_list::iterator begin, Trace_pt_list::iterator end) :
QueryMapper(query_id, begin, end)
Pipeline(const Parameters &params, size_t query_id, Trace_pt_list::iterator begin, Trace_pt_list::iterator end) :
QueryMapper(params, query_id, begin, end)
{}
virtual void run(Statistics &stat);
virtual ~Pipeline() {}
......@@ -93,8 +94,8 @@ namespace ExtensionPipeline {
struct Target;
struct Pipeline : public QueryMapper
{
Pipeline(size_t query_id, Trace_pt_list::iterator begin, Trace_pt_list::iterator end, DpStat &dp_stat) :
QueryMapper(query_id, begin, end),
Pipeline(const Parameters &params, size_t query_id, Trace_pt_list::iterator begin, Trace_pt_list::iterator end, DpStat &dp_stat) :
QueryMapper(params, query_id, begin, end),
dp_stat(dp_stat)
{}
Target& target(size_t i);
......
......@@ -118,7 +118,7 @@ void Pipeline::range_ranking()
const double rr = config.rank_ratio == -1 ? 0.4 : config.rank_ratio;
std::stable_sort(targets.begin(), targets.end(), Target::compare);
IntervalPartition ip((int)std::min(config.max_alignments, (uint64_t)INT_MAX));
for (Ptr_vector< ::Target>::iterator i = targets.begin(); i < targets.end();) {
for (PtrVector< ::Target>::iterator i = targets.begin(); i < targets.end();) {
Target* t = ((Target*)*i);
if (t->is_outranked(ip, source_query_len, rr)) {
if (config.benchmark_ranking) {
......
......@@ -40,7 +40,7 @@ bool Target::is_enveloped(const Target &t, double p) const
return true;
}
bool Target::is_enveloped(Ptr_vector<Target>::const_iterator begin, Ptr_vector<Target>::const_iterator end, double p, int min_score) const
bool Target::is_enveloped(PtrVector<Target>::const_iterator begin, PtrVector<Target>::const_iterator end, double p, int min_score) const
{
for (; begin != end; ++begin)
if (is_enveloped(**begin, p) && (*begin)->filter_score >= min_score)
......@@ -50,10 +50,11 @@ bool Target::is_enveloped(Ptr_vector<Target>::const_iterator begin, Ptr_vector<T
int QueryMapper::raw_score_cutoff() const
{
return score_matrix.rawscore(config.min_bit_score == 0 ? score_matrix.bitscore(config.max_evalue, ref_header.letters, (unsigned)query_seq(0).length()) : config.min_bit_score);
return score_matrix.rawscore(config.min_bit_score == 0 ? score_matrix.bitscore(config.max_evalue, (unsigned)query_seq(0).length()) : config.min_bit_score);
}
QueryMapper::QueryMapper(size_t query_id, Trace_pt_list::iterator begin, Trace_pt_list::iterator end) :
QueryMapper::QueryMapper(const Parameters &params, size_t query_id, Trace_pt_list::iterator begin, Trace_pt_list::iterator end) :
parameters(params),
source_hits(std::make_pair(begin, end)),
query_id((unsigned)query_id),
targets_finished(0),
......@@ -156,9 +157,9 @@ void QueryMapper::score_only_culling()
std::stable_sort(targets.begin(), targets.end(), Target::compare);
auto_ptr<TargetCulling> target_culling(TargetCulling::get());
const unsigned query_len = (unsigned)query_seq(0).length();
Ptr_vector<Target>::iterator i;
PtrVector<Target>::iterator i;
for (i = targets.begin(); i<targets.end();) {
if ((config.min_bit_score == 0 && score_matrix.evalue((*i)->filter_score, config.db_size, query_len) > config.max_evalue)
if ((config.min_bit_score == 0 && score_matrix.evalue((*i)->filter_score, query_len) > config.max_evalue)
|| score_matrix.bitscore((*i)->filter_score) < config.min_bit_score)
break;
const int c = target_culling->cull(**i);
......@@ -178,7 +179,7 @@ void QueryMapper::score_only_culling()
targets.erase(i, targets.end());
}
bool QueryMapper::generate_output(TextBuffer &buffer, Statistics &stat)
bool QueryMapper::generate_output(TextBuffer &buffer, Statistics &stat, const Metadata &metadata)
{
std::stable_sort(targets.begin(), targets.end(), Target::compare);
......@@ -190,7 +191,7 @@ bool QueryMapper::generate_output(TextBuffer &buffer, Statistics &stat)
auto_ptr<Output_format> f(output_format->clone());
for (size_t i = 0; i < targets.size(); ++i) {
if ((config.min_bit_score == 0 && score_matrix.evalue(targets[i].filter_score, config.db_size, query_len) > config.max_evalue)
if ((config.min_bit_score == 0 && score_matrix.evalue(targets[i].filter_score, query_len) > config.max_evalue)
|| score_matrix.bitscore(targets[i].filter_score) < config.min_bit_score)
break;
......@@ -208,7 +209,6 @@ bool QueryMapper::generate_output(TextBuffer &buffer, Statistics &stat)
if (targets[i].outranked)
stat.inc(Statistics::OUTRANKED_HITS);
++n_target_seq;
target_culling->add(targets[i]);
hit_hsps = 0;
......@@ -236,16 +236,17 @@ bool QueryMapper::generate_output(TextBuffer &buffer, Statistics &stat)
translated_query,
query_title,
targets[i].subject_id,
targets[i].subject_id,
ReferenceDictionary::get().block_to_database_id(targets[i].subject_id),
ref_title,
subject_len,
n_target_seq,
hit_hsps), buffer);
hit_hsps), metadata, buffer);
}
++n_hsp;
++hit_hsps;
}
++n_target_seq;
}
if (n_hsp > 0) {
......@@ -253,14 +254,14 @@ bool QueryMapper::generate_output(TextBuffer &buffer, Statistics &stat)
if (*f == Output_format::daa)
finish_daa_query_record(buffer, seek_pos);
else
f->print_query_epilog(buffer, query_title, false);
f->print_query_epilog(buffer, query_title, false, parameters);
}
else
IntermediateRecord::finish_query(buffer, seek_pos);
}
else if (!blocked_processing && *f != Output_format::daa && config.report_unaligned != 0) {
f->print_query_intro(query_id, query_title, source_query_len, buffer, true);
f->print_query_epilog(buffer, query_title, true);
f->print_query_epilog(buffer, query_title, true, parameters);
}
if (!blocked_processing) {
......
......@@ -28,6 +28,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "../util/ptr_vector.h"
#include "../dp/dp.h"
#include "../data/reference.h"
#include "../basic/parameters.h"
#include "../data/metadata.h"
using std::vector;
using std::pair;
......@@ -56,7 +58,7 @@ struct Target
}
bool envelopes(const Hsp_traits &t, double p) const;
bool is_enveloped(const Target &t, double p) const;
bool is_enveloped(Ptr_vector<Target>::const_iterator begin, Ptr_vector<Target>::const_iterator end, double p, int min_score) const;
bool is_enveloped(PtrVector<Target>::const_iterator begin, PtrVector<Target>::const_iterator end, double p, int min_score) const;
void inner_culling(int cutoff);
void apply_filters(int dna_len, int subject_len, const char *query_title, const char *ref_title);
unsigned subject_id;
......@@ -72,9 +74,9 @@ struct Target
struct QueryMapper
{
QueryMapper(size_t query_id, Trace_pt_list::iterator begin, Trace_pt_list::iterator end);
QueryMapper(const Parameters &params, size_t query_id, Trace_pt_list::iterator begin, Trace_pt_list::iterator end);
void init();
bool generate_output(TextBuffer &buffer, Statistics &stat);
bool generate_output(TextBuffer &buffer, Statistics &stat, const Metadata &metadata);
void rank_targets(double ratio, double factor);
void score_only_culling();
int raw_score_cutoff() const;
......@@ -97,10 +99,12 @@ struct QueryMapper
}
virtual void run(Statistics &stat) = 0;
virtual ~QueryMapper() {}
const Parameters &parameters;
pair<Trace_pt_list::iterator, Trace_pt_list::iterator> source_hits;
unsigned query_id, targets_finished, next_target;
unsigned source_query_len, unaligned_from;
Ptr_vector<Target> targets;
PtrVector<Target> targets;
vector<Seed_hit> seed_hits;
vector<Bias_correction> query_cb;
vector<Long_score_profile> profile;
......
......@@ -24,7 +24,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "sequence.h"
#include "masking.h"
const char* Const::version_string = "0.9.17";
const char* Const::version_string = "0.9.19";
const char* Const::program_name = "diamond";
const char* Const::id_delimiters = " \a\b\f\n\r\t\v\1";
......
......@@ -26,7 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "../util/system.h"
#include "reduction.h"
#include "shape_config.h"
#include "../util/temp_file.h"
#include "../util/io/temp_file.h"
#include "../basic/match.h"
#include "../data/sorted_list.h"
#include "../basic/translate.h"
......@@ -58,7 +58,10 @@ Config::Config(int argc, const char **argv)
.add_command("mask", "")
.add_command("fastq2fasta", "")
.add_command("dbinfo", "Print information about a DIAMOND database file")
.add_command("test-extra", "");
.add_command("test-extra", "")
.add_command("test-io", "")
.add_command("db-annot-stats", "")
.add_command("read-sim", "");
Options_group general("General options");
general.add()
......@@ -145,7 +148,8 @@ Config::Config(int argc, const char **argv)
("sallseqid", 0, "include all subject ids in DAA file", sallseqid)
("no-self-hits", 0, "suppress reporting of identical self hits", no_self_hits)
("taxonmap", 0, "protein accession to taxid mapping file", prot_accession2taxid)
("taxonnodes", 0, "taxonomy nodes.dmp from NCBI", nodesdmp);
("taxonnodes", 0, "taxonomy nodes.dmp from NCBI", nodesdmp)
("taxonlist", 0, "restrict search to list of taxon ids (comma-separated)", taxonlist);
Options_group advanced("Advanced options");
advanced.add()
......@@ -229,7 +233,8 @@ Config::Config(int argc, const char **argv)
("hashed-seeds", 0, "", hashed_seeds)
("rank-factor", 0, "", rank_factor, -1.0)
("transcript-len-estimate", 0, "", transcript_len_estimate, 1.0)
("no-traceback", 0, "", disable_traceback);
("no-traceback", 0, "", disable_traceback)
("family-counts", 0, "", family_counts_file);
parser.add(general).add(makedb).add(aligner).add(advanced).add(view_options).add(getseq_options).add(hidden_options);
parser.store(argc, argv, command);
......@@ -345,7 +350,7 @@ Config::Config(int argc, const char **argv)
if (query_range_culling && frame_shift == 0)
throw std::runtime_error("Query range culling is only supported in frameshift alignment mode (option -F).");
if (matrix_file == "")
score_matrix = Score_matrix(to_upper_case(matrix), gap_open, gap_extend, reward, penalty, frame_shift);
score_matrix = Score_matrix(to_upper_case(matrix), gap_open, gap_extend, frame_shift);
else {
if (lambda == 0 || K == 0)
throw std::runtime_error("Custom scoring matrices require setting the --lambda and --K options.");
......
......@@ -148,10 +148,12 @@ struct Config
bool query_range_culling;
double query_range_cover;
double transcript_len_estimate;
string family_counts_file;
string taxonlist;
enum {
makedb = 0, blastp = 1, blastx = 2, view = 3, help = 4, version = 5, getseq = 6, benchmark = 7, random_seqs = 8, compare = 9, sort = 10, roc = 11, db_stat = 12, model_sim = 13,
match_file_stat = 14, model_seqs = 15, opt = 16, mask = 17, fastq2fasta=18, dbinfo=19, test_extra = 20
match_file_stat = 14, model_seqs = 15, opt = 16, mask = 17, fastq2fasta=18, dbinfo=19, test_extra = 20, test_io = 21, db_annot_stats = 22, read_sim = 23
};
unsigned command;
......
......@@ -23,7 +23,7 @@ struct Const
{
enum {
build_version = 118,
build_version = 120,
seedp_bits = 10,
seedp = 1<<seedp_bits,
max_seed_weight = 32,
......
......@@ -232,3 +232,15 @@ void Hsp::push_match(Letter q, Letter s, bool positive)
}
++length;
}
void Hsp::push_gap(Edit_operation op, int length, const char *subject)
{
++gap_openings;
this->length += length;
gaps += length;
if (op == op_insertion)
transcript.push_back(op_insertion, (unsigned)length);
else
for (int i = 0; i < length; ++i)
transcript.push_back(op_deletion, subject[-i]);
}
\ No newline at end of file
......@@ -196,6 +196,7 @@ struct Hsp
bool is_weakly_enveloped_by(std::list<Hsp>::const_iterator begin, std::list<Hsp>::const_iterator end, int cutoff) const;
void push_back(const DiagonalSegment &d, const TranslatedSequence &query, const sequence &subject, bool reversed);
void push_match(Letter q, Letter s, bool positive);
void push_gap(Edit_operation op, int length, const char *subject);
void splice(const DiagonalSegment &d0, const DiagonalSegment &d1, const TranslatedSequence &query, const sequence &subject, bool reversed);
void set_begin(const DiagonalSegment &d, int dna_len);
void set_end(const DiagonalSegment &d, int dna_len);
......@@ -310,7 +311,7 @@ struct Hsp_context
{ return hsp_.score; }
double evalue() const
{
return score_matrix.evalue(score(), config.db_size, (unsigned)query.index(0).length());
return score_matrix.evalue(score(), (unsigned)query.index(0).length());
}
double bit_score() const
{
......
/****
DIAMOND protein aligner
Copyright (C) 2013-2018 Benjamin Buchfink <buchfink@gmail.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
****/
#ifndef PARAMETERS_H_
#define PARAMETERS_H_
#include <stdint.h>
struct Parameters
{
Parameters(uint64_t db_seqs, uint64_t db_letters):
db_seqs(db_seqs),
db_letters(db_letters)
{}
const uint64_t db_seqs, db_letters;
};
#endif
\ No newline at end of file
......@@ -199,10 +199,11 @@ const Matrix_info Matrix_info::matrices[] = {
{ "PAM30", pam30_values, (const char*)NCBISM_Pam30.scores, PAM30_VALUES_MAX, 9, 1 }
};
Score_matrix::Score_matrix(const string & matrix, int gap_open, int gap_extend, int reward, int penalty, int frameshift):
Score_matrix::Score_matrix(const string & matrix, int gap_open, int gap_extend, int frameshift, uint64_t db_letters):
gap_open_ (gap_open == -1 ? Matrix_info::get(matrix).default_gap_open : gap_open),
gap_extend_ (gap_extend == -1 ? Matrix_info::get(matrix).default_gap_extend : gap_extend),
frame_shift_(frameshift),
db_letters_ ((double)db_letters),
constants_ (Matrix_info::get(matrix).get_constants(gap_open_, gap_extend_)),
name_(matrix),
matrix8_(Matrix_info::get(matrix).scores),
......@@ -288,9 +289,10 @@ const char* custom_scores(const string &matrix_file)
return scores;
}
Score_matrix::Score_matrix(const string &matrix_file, double lambda, double K, int gap_open, int gap_extend):
Score_matrix::Score_matrix(const string &matrix_file, double lambda, double K, int gap_open, int gap_extend, uint64_t db_letters):
gap_open_(gap_open),
gap_extend_(gap_extend),
db_letters_((double)db_letters),
name_("custom"),
matrix8_(custom_scores(matrix_file)),
bias_((char)(-low_score())),
......
/****
DIAMOND protein aligner
Copyright (C) 2013-2017 Benjamin Buchfink <buchfink@gmail.com>
Copyright (C) 2013-2018 Benjamin Buchfink <buchfink@gmail.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
......@@ -36,8 +36,8 @@ struct Score_matrix
{
Score_matrix() {}
Score_matrix(const string &matrix, int gap_open, int gap_extend, int reward, int penalty, int frame_shift);
Score_matrix(const string &matrix_file, double lambda, double K, int gap_open, int gap_extend);
Score_matrix(const string &matrix, int gap_open, int gap_extend, int frame_shift, uint64_t db_letters = 0);
Score_matrix(const string &matrix_file, double lambda, double K, int gap_open, int gap_extend, uint64_t db_letters = 0);
friend std::ostream& operator<<(std::ostream& s, const Score_matrix &m);
......@@ -80,11 +80,11 @@ struct Score_matrix
int rawscore(double bitscore) const
{ return (int)ceil(rawscore(bitscore, double ())); }
double evalue(int raw_score, size_t db_letters, unsigned query_len) const
{ return static_cast<double>(db_letters) * query_len * pow(2,-bitscore(raw_score)); }
double evalue(int raw_score, unsigned query_len) const
{ return db_letters_ * query_len * pow(2, -bitscore(raw_score)); }
double bitscore(double evalue, size_t db_letters, unsigned query_len) const
{ return -log(evalue/db_letters/query_len)/log(2); }
double bitscore(double evalue, unsigned query_len) const
{ return -log(evalue/db_letters_/query_len)/log(2); }
double lambda() const
{
......@@ -119,6 +119,16 @@ struct Score_matrix
return frame_shift_;
}
uint64_t db_letters() const
{
return (uint64_t)db_letters_;
}
void set_db_letters(uint64_t n)
{
db_letters_ = (double)n;
}
double avg_id_score() const;
private:
......@@ -142,6 +152,7 @@ private:
};
int gap_open_, gap_extend_, frame_shift_;
double db_letters_;
const double *constants_;
string name_;
Scores<int8_t> matrix8_;
......
......@@ -111,25 +111,34 @@ struct sequence
}
return os;
}
std::ostream& print(std::ostream &os, const Value_traits &v, Reversed) const
TextBuffer& print(TextBuffer &os, const Value_traits &v) const
{
for (unsigned i = 0; i < len_; ++i) {
long l = (long)data_[i];
if ((l & 128) == 0)
os << v.alphabet[l];
else
os << (char)tolower(v.alphabet[l & 127]);
}
return os;
}
TextBuffer& print(TextBuffer &os, const Value_traits &v, Reversed) const
{
for (int i = (int)len_ - 1; i >= 0; --i)
os << v.alphabet[(long)data_[i]];
os << v.alphabet[(long)(data_[i] & 127)];
return os;
}
sequence subseq(int begin, int end) const
{
return sequence(*this, begin, end - 1);
}
friend std::ostream& operator<<(std::ostream &os, const sequence &s)
friend TextBuffer& operator<<(TextBuffer &buf, const sequence &s)
{
return s.print(os, value_traits);
return s.print(buf, value_traits);
}
friend TextBuffer& operator<<(TextBuffer &buf, const sequence &s)
friend std::ostream& operator<<(std::ostream &buf, const sequence &s)
{
for(unsigned i=0;i<s.len_;++i)
buf << value_traits.alphabet[(long)s.data_[i]];
return buf;
return s.print(buf, value_traits);
}
static sequence get_window(const Letter *s, int window)
{
......