Steffen Möller · Steffen Möller · Steffen Möller · Steffen Möller · e8642d33 · e8642d33
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ quick example for setting up and using the program on Linux.
 Installing the software on your system may be done by downloading it in
 binary format for immediate use:

-    wget http://github.com/bbuchfink/diamond/releases/download/v0.9.28/diamond-linux64.tar.gz
+    wget http://github.com/bbuchfink/diamond/releases/download/v0.9.29/diamond-linux64.tar.gz
    tar xzf diamond-linux64.tar.gz

 The extracted `diamond` binary file should be moved to a directory

--- a/debian/changelog
+++ b/debian/changelog
+diamond-aligner (0.9.29+dfsg-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream version
+
+ -- Steffen Moeller <moeller@debian.org>  Thu, 05 Dec 2019 00:17:29 +0100
+
 diamond-aligner (0.9.28+dfsg-1) unstable; urgency=medium

  * Team upload.

--- a/src/ChangeLog
+++ b/src/ChangeLog
+[0.9.29]
+- Fixed a bug that could cause taxonomy features to function incorrectly for databases created by versions 0.9.27 and 0.9.28.
+
 [0.9.28]
 - Fixed a bug that could cause alignment score overflows for scores > 65535 in frameshift alignment mode.
 - Fixed a clang compiler error.

--- a/src/align/align.cpp
+++ b/src/align/align.cpp
@@ -70,13 +70,13 @@ unique_ptr<Queue> Align_fetcher::queue_;
 vector<hit>::iterator Align_fetcher::it_;
 vector<hit>::iterator Align_fetcher::end_;

-void align_worker(size_t thread_id, const Parameters *params, const Metadata *metadata)
+void align_worker(size_t thread_id, const Parameters *params, const Metadata *metadata, const sequence *subjects, size_t subject_count)
 {
 	Align_fetcher hits;
 	Statistics stat;
 	DpStat dp_stat;
 	while (hits.get()) {
-		if (hits.end == hits.begin) {
+		if ((hits.end == hits.begin) && subjects == nullptr) {
 			TextBuffer *buf = 0;
 			if (!blocked_processing && *output_format != Output_format::daa && config.report_unaligned != 0) {
 				buf = new TextBuffer;
@@ -98,7 +98,7 @@ void align_worker(size_t thread_id, const Parameters *params, const Metadata *me
 		task_timer timer("Initializing mapper", hits.target_parallel ? 3 : UINT_MAX);
 		mapper->init();
 		timer.finish();
-		mapper->run(stat);
+		mapper->run(stat, subjects, subject_count);

 		timer.go("Generating output");
 		TextBuffer *buf = 0;
@@ -123,6 +123,13 @@ void align_queries(Trace_pt_buffer &trace_pts, Consumer* output_file, const Para
 {
 	const size_t max_size = (size_t)std::min(config.chunk_size*1e9 * 9 * 2 / config.lowmem, 2e9);
 	pair<size_t, size_t> query_range;
+	vector<sequence> subjects;
+	if (config.swipe_all) {
+		subjects.reserve(ref_seqs::get().get_length());
+		for (size_t i = 0; i < ref_seqs::get().get_length(); ++i)
+			subjects.push_back(ref_seqs::get()[i]);
+	}
+
 	while (true) {
 		task_timer timer("Loading trace points", 3);
 		Trace_pt_list *v = new Trace_pt_list;
@@ -142,7 +149,7 @@ void align_queries(Trace_pt_buffer &trace_pts, Consumer* output_file, const Para
 			threads.emplace_back(heartbeat_worker, query_range.second);
 		size_t n_threads = config.load_balancing == Config::query_parallel ? (config.threads_align == 0 ? config.threads_ : config.threads_align) : 1;
 		for (size_t i = 0; i < n_threads; ++i)
-			threads.emplace_back(align_worker, i, &params, &metadata);
+			threads.emplace_back(align_worker, i, &params, &metadata, subjects.empty() ? nullptr : subjects.data(), subjects.size());
 		for (auto &t : threads)
 			t.join();
 		timer.finish();

--- a/src/align/align.h
+++ b/src/align/align.h
@@ -75,7 +75,7 @@ namespace ExtensionPipeline {
 				QueryMapper(params, query_id, begin, end)
 			{}
 			Target& target(size_t i);
-			virtual void run(Statistics &stat);
+			virtual void run(Statistics &stat, const sequence *subjects = nullptr, size_t subject_count = 0);
 			virtual ~Pipeline() {}
 		};
 	}
@@ -85,7 +85,7 @@ namespace ExtensionPipeline {
 			Pipeline(const Parameters &params, size_t query_id, Trace_pt_list::iterator begin, Trace_pt_list::iterator end) :
 				QueryMapper(params, query_id, begin, end)
 			{}
-			virtual void run(Statistics &stat);
+			virtual void run(Statistics &stat, const sequence *subjects = nullptr, size_t subject_count = 0);
 			virtual ~Pipeline() {}
 		};
 	}
@@ -98,7 +98,7 @@ namespace ExtensionPipeline {
 				dp_stat(dp_stat)
 			{}
 			Target& target(size_t i);
-			virtual void run(Statistics &stat);
+			virtual void run(Statistics &stat, const sequence *subjects = nullptr, size_t subject_count = 0);
 			void run_swipe(bool score_only);
 			void range_ranking();
 			DpStat &dp_stat;

--- a/src/align/banded_swipe_pipeline.cpp
+++ b/src/align/banded_swipe_pipeline.cpp
@@ -154,16 +154,18 @@ void Pipeline::run_swipe(bool score_only)
 	vector<DpTarget> vf, vr;
 	for (size_t i = 0; i < n_targets(); ++i)
 		target(i).add(*this, vf, vr, (int)i);
+	list<Hsp> hsp;
 	if (score_matrix.frame_shift()) {
-		list<Hsp> hsp = banded_3frame_swipe(translated_query, FORWARD, vf.begin(), vf.end(), this->dp_stat, score_only, target_parallel);
+		hsp = banded_3frame_swipe(translated_query, FORWARD, vf.begin(), vf.end(), this->dp_stat, score_only, target_parallel);
 		hsp.splice(hsp.end(), banded_3frame_swipe(translated_query, REVERSE, vr.begin(), vr.end(), this->dp_stat, score_only, target_parallel));
-		while (!hsp.empty()) {
-			list<Hsp> &l = target(hsp.begin()->swipe_target).hsps;
-			l.splice(l.end(), hsp, hsp.begin());
-		}
 	}
 	else {
-		DP::BandedSwipe::swipe(query_seq(0), vf.begin(), vf.end(), Frame(0), score_only ? 0 : DP::TRACEBACK);
+		hsp = DP::BandedSwipe::swipe(query_seq(0), vf.begin(), vf.end(), Frame(0), query_cb[0], score_only ? 0 : DP::TRACEBACK);
+	}
+	
+	while (!hsp.empty()) {
+		list<Hsp> &l = target(hsp.begin()->swipe_target).hsps;
+		l.splice(l.end(), hsp, hsp.begin());
 	}
 }

@@ -177,7 +179,7 @@ void build_ranking_worker(PtrVector<::Target>::iterator begin, PtrVector<::Targe
 	}
 }

-void Pipeline::run(Statistics &stat)
+void Pipeline::run(Statistics &stat, const sequence *subjects, size_t subject_count)
 {
 	task_timer timer("Init banded swipe pipeline", target_parallel ? 3 : UINT_MAX);
 	Config::set_option(config.padding, 32);

--- a/src/align/greedy_pipeline.cpp
+++ b/src/align/greedy_pipeline.cpp
@@ -31,7 +31,7 @@ struct Target : public ::Target
 	void ungapped_stage(QueryMapper &mapper)
 	{
 		if (config.log_subject)
-			cout << "Subject = " << ref_ids::get()[subject_id].c_str() << endl;
+			cout << "Subject = " << ref_ids::get()[subject_block_id].c_str() << endl;
 		std::stable_sort(mapper.seed_hits.begin() + begin, mapper.seed_hits.begin() + end, Seed_hit::compare_diag);
 		typedef Map<vector<Seed_hit>::const_iterator, Seed_hit::Frame> Hit_map;
 		Hit_map hit_map(mapper.seed_hits.begin() + begin, mapper.seed_hits.begin() + end);
@@ -49,7 +49,7 @@ struct Target : public ::Target
 	void greedy_stage(QueryMapper &mapper, Statistics &stat, int cutoff)
 	{
 		if (config.log_subject)
-			cout << "Subject = " << ref_ids::get()[subject_id].c_str() << endl;
+			cout << "Subject = " << ref_ids::get()[subject_block_id].c_str() << endl;
 #ifdef ENABLE_TIMING
 		High_res_timer timer;
 #endif
@@ -75,7 +75,7 @@ struct Target : public ::Target
 	{
 		const size_t n = end - begin;
 		if (config.log_subject)
-			cout << "Subject = " << ref_ids::get()[subject_id].c_str() << endl;
+			cout << "Subject = " << ref_ids::get()[subject_block_id].c_str() << endl;

 		stat.inc(Statistics::CELLS, mapper.query_seq(0).length() * subject.length());

@@ -145,7 +145,7 @@ Target& Pipeline::target(size_t i)
 	return (Target&)(this->targets[i]);
 }

-void Pipeline::run(Statistics &stat)
+void Pipeline::run(Statistics &stat, const sequence *subjects, size_t subject_count)
 {
 	if (n_targets() == 0)
 		return;

--- a/src/align/query_mapper.cpp
+++ b/src/align/query_mapper.cpp
@@ -226,7 +226,7 @@ bool QueryMapper::generate_output(TextBuffer &buffer, Statistics &stat, const Me
 			|| score_matrix.bitscore(targets[i].filter_score) < config.min_bit_score)
 			break;

-		const size_t subject_id = targets[i].subject_id;
+		const size_t subject_id = targets[i].subject_block_id;
 		const unsigned subject_len = (unsigned)ref_seqs::get()[subject_id].length();
 		const char *ref_title = ref_ids::get()[subject_id].c_str();
 		targets[i].apply_filters(source_query_len, subject_len, query_title, ref_title);

--- a/src/align/query_mapper.h
+++ b/src/align/query_mapper.h
 /****
 DIAMOND protein aligner
-Copyright (C) 2013-2017 Benjamin Buchfink <buchfink@gmail.com>
+Copyright (C) 2013-2019 Benjamin Buchfink <buchfink@gmail.com>

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -40,15 +40,15 @@ struct Target
 		filter_score(score)
 	{}
 	Target(size_t begin, unsigned subject_id) :
-		subject_id(subject_id),
+		subject_block_id(subject_id),
 		subject(ref_seqs::get()[subject_id]),
 		filter_score(0),
 		outranked(false),
 		begin(begin)
-	{}		
+	{}
 	static bool compare(Target* lhs, Target *rhs)
 	{
-		return lhs->filter_score > rhs->filter_score || (lhs->filter_score == rhs->filter_score && lhs->subject_id < rhs->subject_id);
+		return lhs->filter_score > rhs->filter_score || (lhs->filter_score == rhs->filter_score && lhs->subject_block_id < rhs->subject_block_id);
 	}
 	void fill_source_ranges(size_t query_len)
 	{
@@ -62,7 +62,7 @@ struct Target
 	bool is_enveloped(PtrVector<Target>::const_iterator begin, PtrVector<Target>::const_iterator end, double p, int min_score) const;
 	void inner_culling(int cutoff);
 	void apply_filters(int dna_len, int subject_len, const char *query_title, const char *ref_title);
-	unsigned subject_id;
+	unsigned subject_block_id;
 	sequence subject;
 	int filter_score;
 	float filter_time;
@@ -100,7 +100,7 @@ struct QueryMapper
 		for (size_t i = 0; i < targets.size(); ++i)
 			targets[i].fill_source_ranges(source_query_len);
 	}
-	virtual void run(Statistics &stat) = 0;
+	virtual void run(Statistics &stat, const sequence *subjects = nullptr, size_t subject_count = 0) = 0;
 	virtual ~QueryMapper() {}

 	const Parameters &parameters;

--- a/src/align/swipe_pipeline.cpp
+++ b/src/align/swipe_pipeline.cpp
 /****
 DIAMOND protein aligner
-Copyright (C) 2013-2017 Benjamin Buchfink <buchfink@gmail.com>
+Copyright (C) 2013-2019 Benjamin Buchfink <buchfink@gmail.com>

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -16,23 +16,45 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ****/

+#include <vector>
+#include <map>
 #include "align.h"
 #include "query_mapper.h"
 #include "../data/reference.h"

+using std::vector;
+using std::map;
+
 namespace ExtensionPipeline { namespace Swipe {

-void Pipeline::run(Statistics &stat)
+void Pipeline::run(Statistics &stat, const sequence *subjects, size_t subject_count)
 {
-	const size_t n = targets.size();
-	vector<sequence> seqs(n);
-	for (size_t i = 0; i < n; ++i) {
-		seqs[i] = ref_seqs::get()[targets[i].subject_id];
+	list<Hsp> hsp;
+	if (subjects == nullptr) {
+		vector<sequence> seqs;
+		const size_t n = targets.size();
+		seqs.reserve(n);
+		for (size_t i = 0; i < n; ++i)
+			seqs.push_back(ref_seqs::get()[targets[i].subject_block_id]);
+		 hsp = DP::Swipe::swipe(query_seq(0), seqs.data(), seqs.data() + seqs.size(), raw_score_cutoff());
 	}
-	vector<int> scores = DP::Swipe::swipe(query_seq(0), seqs.data(), seqs.data() + seqs.size());
-	for (size_t i = 0; i < n; ++i) {
-		targets[i].hsps.push_back(Hsp(scores[i]));
-		targets[i].hsps.back().frame = 0;
+	else
+		hsp = DP::Swipe::swipe(query_seq(0), subjects, subjects + subject_count, raw_score_cutoff());	
+
+	map<unsigned, unsigned> subject_idx;
+	while (!hsp.empty()) {
+		unsigned i;
+		if (subjects == nullptr)
+			i = hsp.begin()->swipe_target;
+		else {
+			const auto it = subject_idx.emplace(hsp.begin()->swipe_target, (unsigned)targets.size());
+			if (it.second)
+				targets.push_back(new Target(0, hsp.begin()->swipe_target));
+			i = it.first->second;
+		}
+		targets[i].filter_score = hsp.begin()->score;
+		list<Hsp> &l = targets[i].hsps;		
+		l.splice(l.end(), hsp, hsp.begin());
 	}
 }


--- a/src/basic/basic.cpp
+++ b/src/basic/basic.cpp
@@ -24,7 +24,7 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #include "sequence.h"
 #include "masking.h"

-const char* Const::version_string = "0.9.28";
+const char* Const::version_string = "0.9.29";
 const char* Const::program_name = "diamond";
 const char* Const::id_delimiters = " \a\b\f\n\r\t\v\1";


--- a/src/basic/config.cpp
+++ b/src/basic/config.cpp
@@ -280,7 +280,8 @@ Config::Config(int argc, const char **argv)
 		("hard-masked", 0, "", hardmasked)
 		("cbs-window", 0, "", cbs_window, 40)
 		("no-unlink", 0, "", no_unlink)
-		("no-dict", 0, "", no_dict);
+		("no-dict", 0, "", no_dict)
+		("swipe", 0, "", swipe_all);
 		
 	parser.add(general).add(makedb).add(aligner).add(advanced).add(view_options).add(getseq_options).add(hidden_options);
 	parser.store(argc, argv, command);
@@ -452,5 +453,10 @@ Config::Config(int argc, const char **argv)
 	/*log_stream << "sizeof(hit)=" << sizeof(hit) << " sizeof(packed_uint40_t)=" << sizeof(packed_uint40_t)
 		<< " sizeof(sorted_list::entry)=" << sizeof(sorted_list::entry) << endl;*/

+	if (swipe_all) {
+		algo = double_indexed;
+		ext = swipe;
+	}
+
 	use_lazy_dict = false;
 }
--- a/src/basic/config.h
+++ b/src/basic/config.h
@@ -181,6 +181,7 @@ struct Config
 	int tantan_maxRepeatOffset;
 	bool tantan_ungapped;
 	string taxon_exclude;
+	bool swipe_all;

 	enum {
 		makedb = 0, blastp = 1, blastx = 2, view = 3, help = 4, version = 5, getseq = 6, benchmark = 7, random_seqs = 8, compare = 9, sort = 10, roc = 11, db_stat = 12, model_sim = 13,

--- a/src/basic/const.h
+++ b/src/basic/const.h
@@ -23,7 +23,7 @@ struct Const
 {

 	enum {
-		build_version = 129,
+		build_version = 130,
 		seedp_bits = 10,
 		seedp = 1<<seedp_bits,
 		max_seed_weight = 32,

--- a/src/basic/match.h
+++ b/src/basic/match.h
 /****
 DIAMOND protein aligner
-Copyright (C) 2013-2017 Benjamin Buchfink <buchfink@gmail.com>
+Copyright (C) 2013-2019 Benjamin Buchfink <buchfink@gmail.com>

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -56,7 +56,7 @@ struct Hsp
 		sw_score(0)
 	{}

-	Hsp(int score) :
+	Hsp(int score, unsigned swipe_target = 0) :
 		score(unsigned(score)),
 		frame(0),
 		length(0),
@@ -65,7 +65,8 @@ struct Hsp
 		positives(0),
 		gap_openings(0),
 		gaps(0),
-		sw_score(0)
+		sw_score(0),
+		swipe_target(swipe_target)
 	{}

 	Hsp(const IntermediateRecord &r, unsigned query_source_len);

--- a/src/data/taxonomy.h
+++ b/src/data/taxonomy.h
@@ -49,6 +49,7 @@ struct Taxonomy
 			if (l > max_accesion_len)
 				throw AccessionLengthError();
 			std::copy(s, s + l, this->s);
+			std::fill(this->s + l, this->s + max_accesion_len, '\0');
 		}
 		Accession(const std::string &s)
 		{
@@ -58,8 +59,8 @@ struct Taxonomy
 				//this->s[0] = 0;
 				throw AccessionLengthError();
 			}
-			else
-				std::copy(t.c_str(), t.c_str() + t.length(), this->s);
+			std::copy(t.c_str(), t.c_str() + t.length(), this->s);
+			std::fill(this->s + t.length(), this->s + max_accesion_len, '\0');
 		}
 		bool operator<(const Accession &y) const
 		{

--- a/src/dp/comp_based_stats.cpp
+++ b/src/dp/comp_based_stats.cpp
@@ -98,6 +98,11 @@ Bias_correction::Bias_correction(const sequence &seq):
 			this->operator[](m) = (float)background_scores[(int)r] - float(scores.scores[(int)r] - score_matrix(r, r)) / (n - 1);
 		++m;
 	}
+
+	int16.reserve(seq.length());
+	for (float f : *this) {
+		int16.push_back(int16_t(f < 0.0f ? f - 0.5f : f + 0.5f));
+	}
 }

 int Bias_correction::operator()(const Hsp &hsp) const

--- a/src/dp/dp.h
+++ b/src/dp/dp.h
 /****
 DIAMOND protein aligner
-Copyright (C) 2013-2017 Benjamin Buchfink <buchfink@gmail.com>
+Copyright (C) 2013-2019 Benjamin Buchfink <buchfink@gmail.com>

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -39,7 +39,7 @@ struct No_score_correction
 	{}
 };

-struct Bias_correction : public vector<float>
+struct Bias_correction : public std::vector<float>
 {
 	Bias_correction(const sequence &seq);
 	void operator()(float &score, int i, int query_anchor, int mult) const
@@ -48,6 +48,7 @@ struct Bias_correction : public vector<float>
 	}
 	int operator()(const Hsp &hsp) const;
 	int operator()(const Diagonal_segment &d) const;
+	std::vector<int16_t> int16;
 };

 struct Seed_hit
@@ -557,13 +558,13 @@ enum { TRACEBACK = 1 };
 	
 namespace Swipe {

-DECL_DISPATCH(std::vector<int>, swipe, (const sequence &query, const sequence *subject_begin, const sequence *subject_end))
+DECL_DISPATCH(std::list<Hsp>, swipe, (const sequence &query, const sequence *subject_begin, const sequence *subject_end, int score_cutoff))

 }

 namespace BandedSwipe {

-DECL_DISPATCH(void, swipe, (const sequence &query, std::vector<DpTarget>::iterator target_begin, std::vector<DpTarget>::iterator target_end, Frame frame, int flags))
+DECL_DISPATCH(std::list<Hsp>, swipe, (const sequence &query, std::vector<DpTarget>::iterator target_begin, std::vector<DpTarget>::iterator target_end, Frame frame, const Bias_correction &composition_bias, int flags))

 }


--- a/src/dp/score_vector.h
+++ b/src/dp/score_vector.h
@@ -222,285 +222,6 @@ struct score_vector<uint8_t>

 };

-template<>
-struct score_vector<int8_t>
-{
-
-	typedef int8_t Score;
-	enum { CHANNELS = 16 };
-
-	score_vector()
-	{
-		//data_ = _mm_set1_epi8(score_traits<uint8_t>::zero);
-		data_ = _mm_setzero_si128();
-	}
-
-	explicit score_vector(char x) :
-		data_(_mm_set(x))
-	{ }
-
-	explicit score_vector(__m128i data) :
-		data_(data)
-	{ }
-
-	score_vector(unsigned a, const __m128i &seq)
-	{
-#ifdef __SSSE3__
-		set_ssse3(a, seq);
-#else
-		set_generic(a, seq);
-#endif
-	}
-
-	score_vector(unsigned a, const __m128i &seq, const score_vector &bias)
-	{
-#ifdef __SSSE3__
-		set_ssse3(a, seq);
-#else
-		set_generic(a, seq);
-#endif
-	}
-
-	void set_ssse3(unsigned a, const __m128i &seq)
-	{
-#ifdef __SSSE3__
-		const __m128i *row = reinterpret_cast<const __m128i*>(&score_matrix.matrix8u()[a << 5]);
-
-		__m128i high_mask = _mm_slli_epi16(_mm_and_si128(seq, _mm_set1_epi8('\x10')), 3);
-		__m128i seq_low = _mm_or_si128(seq, high_mask);
-		__m128i seq_high = _mm_or_si128(seq, _mm_xor_si128(high_mask, _mm_set1_epi8('\x80')));
-
-		__m128i r1 = _mm_load_si128(row);
-		__m128i r2 = _mm_load_si128(row + 1);
-		__m128i s1 = _mm_shuffle_epi8(r1, seq_low);
-		__m128i s2 = _mm_shuffle_epi8(r2, seq_high);
-		data_ = _mm_or_si128(s1, s2);
-#endif
-	}
-
-	void set_generic(unsigned a, const __m128i &seq)
-	{
-		const uint8_t* row(&score_matrix.matrix8u()[a << 5]);
-		const uint8_t* seq_ptr(reinterpret_cast<const uint8_t*>(&seq));
-		uint8_t* dest(reinterpret_cast<uint8_t*>(&data_));
-		for (unsigned i = 0; i < 16; i++)
-			*(dest++) = row[*(seq_ptr++)];
-	}
-
-	score_vector(const uint8_t* s) :
-		data_(_mm_loadu_si128(reinterpret_cast<const __m128i*>(s)))
-	{ }
-
-	score_vector operator+(const score_vector &rhs) const
-	{
-		return score_vector(_mm_adds_epi8(data_, rhs.data_));
-	}
-
-	score_vector operator-(const score_vector &rhs) const
-	{
-		return score_vector(_mm_subs_epi8(data_, rhs.data_));
-	}
-
-	score_vector& operator-=(const score_vector &rhs)
-	{
-		data_ = _mm_subs_epi8(data_, rhs.data_);
-		return *this;
-	}
-
-	score_vector& operator++()
-	{
-		data_ = _mm_adds_epi8(data_, _mm_set(1));
-		return *this;
-	}
-
-	__m128i operator==(const score_vector &rhs) const
-	{
-		return _mm_cmpeq_epi8(data_, rhs.data_);
-	}
-
-	void unbias(const score_vector &bias)
-	{
-		this->operator -=(bias);
-	}
-
-	int operator [](unsigned i) const
-	{
-		return *(((uint8_t*)&data_) + i);
-	}
-
-	void set(unsigned i, uint8_t v)
-	{
-		*(((uint8_t*)&data_) + i) = v;
-	}
-
-	score_vector& max(const score_vector &rhs)
-	{
-#ifdef __SSE4_1__
-		data_ = _mm_max_epi8(data_, rhs.data_);
-#endif
-		return *this;
-	}
-
-	score_vector& min(const score_vector &rhs)
-	{
-#ifdef __SSE4_1__
-		data_ = _mm_min_epi8(data_, rhs.data_);
-#endif
-		return *this;
-	}
-
-	friend score_vector max(const score_vector& lhs, const score_vector &rhs)
-	{
-#ifdef __SSE4_1__
-		return score_vector(_mm_max_epi8(lhs.data_, rhs.data_));
-#else
-		return score_vector();
-#endif
-	}
-
-	friend score_vector min(const score_vector& lhs, const score_vector &rhs)
-	{
-#ifdef __SSE4_1__
-		return score_vector(_mm_min_epi8(lhs.data_, rhs.data_));
-#else
-		return score_vector();
-#endif
-	}
-
-	uint16_t cmpeq(const score_vector &rhs) const
-	{
-		return _mm_movemask_epi8(_mm_cmpeq_epi8(data_, rhs.data_));
-	}
-
-	__m128i cmpeq2(const score_vector &rhs) const
-	{
-		return _mm_cmpeq_epi8(data_, rhs.data_);
-	}
-
-	uint16_t cmpgt(const score_vector &rhs) const
-	{
-		return _mm_movemask_epi8(_mm_cmpgt_epi8(data_, rhs.data_));
-	}
-
-	void store(uint8_t *ptr) const
-	{
-		_mm_storeu_si128((__m128i*)ptr, data_);
-	}
-
-	friend std::ostream& operator<<(std::ostream &s, score_vector v)
-	{
-		uint8_t x[16];
-		v.store(x);
-		for (unsigned i = 0; i < 16; ++i)
-			printf("%3i ", (int)x[i]);
-		return s;
-	}
-
-	__m128i data_;
-
-};
-
-template<>
-struct score_vector<int16_t>
-{
-
-	score_vector():
-		data_(_mm_set1_epi16(SHRT_MIN))
-	{}
-
-	explicit score_vector(int x)
-	{
-		data_ = _mm_set1_epi16(x);
-	}
-
-	explicit score_vector(__m128i data) :
-		data_(data)
-	{ }
-
-	score_vector(unsigned a, uint64_t seq)
-	{
-		const uint16_t* row((uint16_t*)&score_matrix.matrix16()[a << 5]);
-		uint64_t b = uint64_t(row[seq & 0xff]);
-		seq >>= 8;
-		b |= uint64_t(row[seq & 0xff]) << 16;
-		seq >>= 8;
-		b |= uint64_t(row[seq & 0xff]) << 16 * 2;
-		seq >>= 8;
-		b |= uint64_t(row[seq & 0xff]) << 16 * 3;
-		seq >>= 8;
-		uint64_t c = uint64_t(row[seq & 0xff]);
-		seq >>= 8;
-		c |= uint64_t(row[seq & 0xff]) << 16;
-		seq >>= 8;
-		c |= uint64_t(row[seq & 0xff]) << 16 * 2;
-		seq >>= 8;
-		c |= uint64_t(row[seq & 0xff]) << 16 * 3;
-		data_ = _mm_set_epi64x(c, b);
-	}
-
-	score_vector(unsigned a, const __m128i &seq, const score_vector &bias)
-	{
-#ifdef __SSSE3__
-		const __m128i *row = reinterpret_cast<const __m128i*>(&score_matrix.matrix8u()[a << 5]);
-
-		__m128i high_mask = _mm_slli_epi16(_mm_and_si128(seq, _mm_set1_epi8('\x10')), 3);
-		__m128i seq_low = _mm_or_si128(seq, high_mask);
-		__m128i seq_high = _mm_or_si128(seq, _mm_xor_si128(high_mask, _mm_set1_epi8('\x80')));
-
-		__m128i r1 = _mm_load_si128(row);
-		__m128i r2 = _mm_load_si128(row + 1);
-		__m128i s1 = _mm_shuffle_epi8(r1, seq_low);
-		__m128i s2 = _mm_shuffle_epi8(r2, seq_high);
-		data_ = _mm_subs_epi16(_mm_and_si128(_mm_or_si128(s1, s2), _mm_set1_epi16(255)), bias.data_);
-#endif
-	}
-
-	score_vector operator+(const score_vector &rhs) const
-	{
-		return score_vector(_mm_adds_epi16(data_, rhs.data_));
-	}
-
-	score_vector operator-(const score_vector &rhs) const
-	{
-		return score_vector(_mm_subs_epi16(data_, rhs.data_));
-	}
-
-	score_vector& operator-=(const score_vector &rhs)
-	{
-		data_ = _mm_subs_epi16(data_, rhs.data_);
-		return *this;
-	}
-	
-	score_vector& max(const score_vector &rhs)
-	{
-		data_ = _mm_max_epi16(data_, rhs.data_);
-		return *this;
-	}
-
-	friend score_vector max(const score_vector& lhs, const score_vector &rhs)
-	{
-		return score_vector(_mm_max_epi16(lhs.data_, rhs.data_));
-	}
-
-	uint16_t cmpeq(const score_vector &rhs) const
-	{
-		return _mm_movemask_epi8(_mm_cmpeq_epi16(data_, rhs.data_));
-	}
-
-	__m128i cmpgt(const score_vector &rhs) const
-	{
-		return _mm_cmpgt_epi16(data_, rhs.data_);
-	}
-
-	void store(int16_t *ptr) const
-	{
-		_mm_storeu_si128((__m128i*)ptr, data_);
-	}
-
-	__m128i data_;
-
-};
-
 #endif

 template<typename _t>
@@ -544,38 +265,16 @@ inline void store_sv(int32_t sv, int32_t *dst)

 #ifdef __SSE2__

-template<>
-struct ScoreTraits<score_vector<int16_t>>
-{
-	enum { CHANNELS = 8 };
-	typedef int16_t Score;
-	static score_vector<int16_t> zero()
-	{
-		return score_vector<int16_t>();
-	}
-	static void saturate(score_vector<int16_t> &v)
-	{
-	}
-	static int16_t zero_score()
-	{
-		return SHRT_MIN;
-	}
-	static int int_score(Score s)
-	{
-		return (uint16_t)s ^ 0x8000;
-	}
-	static int16_t max_score()
-	{
-		return SHRT_MAX;
-	}
-};
-
 template<>
 struct ScoreTraits<score_vector<uint8_t>>
 {
+	enum { CHANNELS = 16 };
 	static score_vector<uint8_t> zero() {
 		return score_vector<uint8_t>();
 	}
+	static constexpr uint8_t max_score() {
+		return std::numeric_limits<uint8_t>::max();
+	}
 };

 template<typename _t, typename _p>
@@ -586,4 +285,24 @@ inline void store_sv(const score_vector<_t> &sv, _p *dst)

 #endif

+template<typename _sv>
+inline typename ScoreTraits<_sv>::Score extract_channel(const _sv &v, int i) {
+	return v[i];
+}
+
+template<>
+inline int extract_channel<int>(const int &v, int i) {
+	return v;
+}
+
+template<typename _sv>
+inline void set_channel(_sv &v, int i, typename ScoreTraits<_sv>::Score x) {
+	v.set(i, x);
+}
+
+template<>
+inline void set_channel<int>(int &v, int i, int x) {
+	v = x;
+}
+
 #endif /* SCORE_VECTOR_H_ */
--- a/src/dp/score_vector_int16.h
+++ b/src/dp/score_vector_int16.h
+/****
+DIAMOND protein aligner
+Copyright (C) 2013-2019 Benjamin Buchfink <buchfink@gmail.com>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+****/
+
+#include "score_vector.h"
+
+#ifndef SCORE_VECTOR_INT16_H_
+#define SCORE_VECTOR_INT16_H_
+
+#ifdef __SSE2__
+
+template<>
+struct score_vector<int16_t>
+{
+
+	score_vector() :
+		data_(_mm_set1_epi16(SHRT_MIN))
+	{}
+
+	explicit score_vector(int x)
+	{
+		data_ = _mm_set1_epi16(x);
+	}
+
+	explicit score_vector(int16_t x)
+	{
+		data_ = _mm_set1_epi16(x);
+	}
+
+	explicit score_vector(__m128i data) :
+		data_(data)
+	{ }
+
+	score_vector(unsigned a, uint64_t seq)
+	{
+		const uint16_t* row((uint16_t*)&score_matrix.matrix16()[a << 5]);
+		uint64_t b = uint64_t(row[seq & 0xff]);
+		seq >>= 8;
+		b |= uint64_t(row[seq & 0xff]) << 16;
+		seq >>= 8;
+		b |= uint64_t(row[seq & 0xff]) << 16 * 2;
+		seq >>= 8;
+		b |= uint64_t(row[seq & 0xff]) << 16 * 3;
+		seq >>= 8;
+		uint64_t c = uint64_t(row[seq & 0xff]);
+		seq >>= 8;
+		c |= uint64_t(row[seq & 0xff]) << 16;
+		seq >>= 8;
+		c |= uint64_t(row[seq & 0xff]) << 16 * 2;
+		seq >>= 8;
+		c |= uint64_t(row[seq & 0xff]) << 16 * 3;
+		data_ = _mm_set_epi64x(c, b);
+	}
+
+	score_vector(unsigned a, const __m128i &seq, const score_vector &bias)
+	{
+#ifdef __SSSE3__
+		const __m128i *row = reinterpret_cast<const __m128i*>(&score_matrix.matrix8u()[a << 5]);
+
+		__m128i high_mask = _mm_slli_epi16(_mm_and_si128(seq, _mm_set1_epi8('\x10')), 3);
+		__m128i seq_low = _mm_or_si128(seq, high_mask);
+		__m128i seq_high = _mm_or_si128(seq, _mm_xor_si128(high_mask, _mm_set1_epi8('\x80')));
+
+		__m128i r1 = _mm_load_si128(row);
+		__m128i r2 = _mm_load_si128(row + 1);
+		__m128i s1 = _mm_shuffle_epi8(r1, seq_low);
+		__m128i s2 = _mm_shuffle_epi8(r2, seq_high);
+		data_ = _mm_subs_epi16(_mm_and_si128(_mm_or_si128(s1, s2), _mm_set1_epi16(255)), bias.data_);
+#endif
+	}
+
+	score_vector operator+(const score_vector &rhs) const
+	{
+		return score_vector(_mm_adds_epi16(data_, rhs.data_));
+	}
+
+	score_vector operator-(const score_vector &rhs) const
+	{
+		return score_vector(_mm_subs_epi16(data_, rhs.data_));
+	}
+
+	score_vector& operator-=(const score_vector &rhs)
+	{
+		data_ = _mm_subs_epi16(data_, rhs.data_);
+		return *this;
+	}
+
+	score_vector& max(const score_vector &rhs)
+	{
+		data_ = _mm_max_epi16(data_, rhs.data_);
+		return *this;
+	}
+
+	friend score_vector max(const score_vector& lhs, const score_vector &rhs)
+	{
+		return score_vector(_mm_max_epi16(lhs.data_, rhs.data_));
+	}
+
+	uint16_t cmpeq(const score_vector &rhs) const
+	{
+		return _mm_movemask_epi8(_mm_cmpeq_epi16(data_, rhs.data_));
+	}
+
+	__m128i cmpgt(const score_vector &rhs) const
+	{
+		return _mm_cmpgt_epi16(data_, rhs.data_);
+	}
+
+	void store(int16_t *ptr) const
+	{
+		_mm_storeu_si128((__m128i*)ptr, data_);
+	}
+
+	int16_t operator[](int i) const {
+		int16_t d[8];
+		store(d);
+		return d[i];
+	}
+
+	void set(int i, int16_t x) {
+		int16_t d[8];
+		store(d);
+		d[i] = x;
+		data_ = _mm_loadu_si128((__m128i*)d);
+	}
+
+	__m128i data_;
+
+};
+
+template<>
+struct ScoreTraits<score_vector<int16_t>>
+{
+	enum { CHANNELS = 8 };
+	typedef int16_t Score;
+	static score_vector<int16_t> zero()
+	{
+		return score_vector<int16_t>();
+	}
+	static void saturate(score_vector<int16_t> &v)
+	{
+	}
+	static constexpr int16_t zero_score()
+	{
+		return SHRT_MIN;
+	}
+	static int int_score(Score s)
+	{
+		return (uint16_t)s ^ 0x8000;
+	}
+	static int16_t max_score()
+	{
+		return SHRT_MAX;
+	}
+};
+
+#endif
+
+#endif
\ No newline at end of file