Skip to content
Commits on Source (6)
cmake_minimum_required(VERSION 3.2)
project(spoa LANGUAGES CXX VERSION 1.1.5)
project(spoa LANGUAGES CXX VERSION 3.0.0)
include(GNUInstallDirs)
......@@ -7,13 +7,18 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -march=native")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
option(spoa_build_executable "Build spoa standalone tool" OFF)
option(spoa_build_tests "Build spoa unit tests" OFF)
option(spoa_optimize_for_native "Buiold spoa with march=native" ON)
if (spoa_optimize_for_native)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
endif()
# build SPOA as a static library by default
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build all libraries as shared")
......@@ -53,7 +58,7 @@ if (spoa_build_executable)
set_target_properties(spoa_bin PROPERTIES OUTPUT_NAME spoa)
install(TARGETS spoa_bin DESTINATION ${CMAKE_INSTALL_BINDIR})
endif(spoa_build_executable)
endif()
if (spoa_build_tests)
set(spoa_test_data_path ${PROJECT_SOURCE_DIR}/test/data/)
......@@ -69,7 +74,9 @@ if (spoa_build_tests)
if (NOT TARGET bioparser)
add_subdirectory(vendor/bioparser EXCLUDE_FROM_ALL)
endif()
if (NOT TARGET gtest_main)
add_subdirectory(vendor/googletest/googletest EXCLUDE_FROM_ALL)
endif()
target_link_libraries(spoa_test spoa bioparser gtest_main)
endif(spoa_build_tests)
endif()
......@@ -4,7 +4,7 @@
[![Build status for c++/clang++](https://travis-ci.org/rvaser/spoa.svg?branch=master)](https://travis-ci.org/rvaser/spoa)
[![Published in Genome Research](https://img.shields.io/badge/published%20in-Genome%20Research-blue.svg)](https://doi.org/10.1101/gr.214270.116)
Spoa (SIMD POA) is a c++ implementation of the partial order alignment (POA) algorithm (as described in 10.1093/bioinformatics/18.3.452) which is used to generate consensus sequences (as described in 10.1093/bioinformatics/btg109). It supports three alignment modes: local (Smith-Waterman), global (Needleman-Wunsch) and semi-global alignment (overlap). It supports Intel SSE4.1+ and AVX2 vectorization (marginally faster due to high latency shifts).
Spoa (SIMD POA) is a c++ implementation of the partial order alignment (POA) algorithm (as described in 10.1093/bioinformatics/18.3.452) which is used to generate consensus sequences (as described in 10.1093/bioinformatics/btg109). It supports three alignment modes: local (Smith-Waterman), global (Needleman-Wunsch) and semi-global alignment (overlap), and three gap modes: linear, affine and convex (piecewise affine). It supports Intel SSE4.1+ and AVX2 vectorization (marginally faster due to high latency shifts).
## Dependencies
......@@ -45,6 +45,7 @@ To build unit tests add `-Dspoa_build_tests=ON` while running `cmake`. After ins
Usage of spoa is as following:
```bash
spoa [options ...] <sequences>
<sequences>
......@@ -52,15 +53,26 @@ Usage of spoa is as following:
containing sequences
options:
-m, --match <int>
-m <int>
default: 5
score for matching bases
-x, --mismatch <int>
-n <int>
default: -4
score for mismatching bases
-g, --gap <int>
-g <int>
default: -8
gap penalty (must be negative)
gap opening penalty (must be non-positive)
-e <int>
default: -6
gap extension penalty (must be non-positive)
-q <int>
default: -10
gap opening penalty of the second affine function
(must be non-positive)
-c <int>
default: -4
gap extension penalty of the second affine function
(must be non-positive)
-l, --algorithm <int>
default: 0
alignment mode:
......@@ -80,6 +92,12 @@ Usage of spoa is as following:
-h, --help
prints the usage
gap mode:
linear if g >= e
affine if g <= q or e >= c
convex otherwise (default)
```
### Library
Simple library usage can be seen in the following `example.cpp` file. This code shows how to get consensus and multiple sequence alignment for a set of sequences without quality values.
......@@ -99,12 +117,12 @@ int main(int argc, char** argv) {
};
auto alignment_engine = spoa::createAlignmentEngine(static_cast<spoa::AlignmentType>(atoi(argv[1])),
atoi(argv[2]), atoi(argv[3]), atoi(argv[4]));
atoi(argv[2]), atoi(argv[3]), atoi(argv[4]), atoi(argv[5]));
auto graph = spoa::createGraph();
for (const auto& it: sequences) {
auto alignment = alignment_engine->align_sequence_with_graph(it, graph);
auto alignment = alignment_engine->align(it, graph);
graph->add_alignment(alignment, it);
}
......@@ -136,7 +154,7 @@ g++ example.cpp -std=c++11 -lspoa -o example
The executable can be run with:
```bash
./example 0 5 -4 -8
./example 0 5 -4 -8 -6
```
On the other hand, if you are using `cmake` you can add spoa to your project by adding commands `add_subdirectory(vendor/spoa EXCLUDE_FROM_ALL)` and `target_link_libraries(your_exe spoa)` to your main CMakeLists file.
......
spoa (3.0.0-1) UNRELEASED; urgency=medium
* New upstream version
* Standards-Version: 4.4.0
TODO: Newer version of libbioparser-dev (see
https://github.com/rvaser/bioparser/issues/4 )
-- Andreas Tille <tille@debian.org> Fri, 12 Jul 2019 21:06:20 +0200
spoa (1.1.5-1) unstable; urgency=medium
* New upstream version
......
......@@ -10,7 +10,7 @@ Build-Depends: debhelper (>= 12~),
libbioparser-dev,
libgtest-dev,
zlib1g-dev
Standards-Version: 4.3.0
Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/spoa
Vcs-Git: https://salsa.debian.org/med-team/spoa.git
Homepage: https://github.com/rvaser/spoa
......
Description: Do not build with -march
Bug-Debian: https://bugs.debian.org/911970
Author: Andreas Tille <tille@debian.org>
Last-Update: Sat, 03 Nov 2018 07:20:26 +0100
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,7 +7,7 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PRO
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -march=native")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
use_debian_packaged_libs.patch
shared_and_static.patch
no_march.patch
# fix_soversion.patch
......@@ -4,8 +4,8 @@ Description: Build shared and static lib
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,9 +16,15 @@ option(spoa_build_executable "Build spoa
option(spoa_build_tests "Build spoa unit tests" OFF)
@@ -21,9 +21,15 @@ if (spoa_optimize_for_native)
endif()
# build SPOA as a static library by default
-set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build all libraries as shared")
......@@ -22,7 +22,7 @@ Description: Build shared and static lib
src/alignment_engine.cpp
src/graph.cpp
src/simd_alignment_engine.cpp
@@ -28,12 +34,17 @@ target_include_directories(spoa PUBLIC
@@ -33,12 +39,17 @@ target_include_directories(spoa PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>)
......
......@@ -4,7 +4,7 @@ Description: Use Debian packaged libraries
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,11 +45,7 @@ if (spoa_build_executable)
@@ -50,11 +50,7 @@ if (spoa_build_executable)
src/sequence.cpp
src/main.cpp)
......@@ -17,15 +17,17 @@ Description: Use Debian packaged libraries
set_target_properties(spoa_bin PROPERTIES OUTPUT_NAME spoa)
install(TARGETS spoa_bin DESTINATION ${CMAKE_INSTALL_BINDIR})
@@ -66,10 +62,5 @@ if (spoa_build_tests)
@@ -71,12 +67,5 @@ if (spoa_build_tests)
src/sequence.cpp
test/spoa_test.cpp)
- if (NOT TARGET bioparser)
- add_subdirectory(vendor/bioparser EXCLUDE_FROM_ALL)
- endif()
- if (NOT TARGET gtest_main)
- add_subdirectory(vendor/googletest/googletest EXCLUDE_FROM_ALL)
- endif()
-
- target_link_libraries(spoa_test spoa bioparser gtest_main)
+ target_link_libraries(spoa_test spoa gtest_main gtest z pthread)
endif(spoa_build_tests)
endif()
......@@ -6,7 +6,8 @@ export DEB_BUILD_MAINT_OPTIONS=hardening=+all
DEB_CMAKE_EXTRA_FLAGS = -DCMAKE_BUILD_TYPE=Release \
-Dspoa_build_executable=ON \
-Dspoa_build_tests=ON
-Dspoa_build_tests=ON \
-Dspoa_optimize_for_native=FALSE
%:
dh $@
......
......@@ -6,7 +6,7 @@
#pragma once
#include <stdint.h>
#include <cstdint>
#include <string>
#include <memory>
#include <vector>
......@@ -20,34 +20,54 @@ enum class AlignmentType {
kOV // Overlap
};
enum class AlignmentSubtype {
kLinear,
kAffine,
kConvex
};
class Graph;
using Alignment = std::vector<std::pair<int32_t, int32_t>>;
using Alignment = std::vector<std::pair<std::int32_t, std::int32_t>>;
class AlignmentEngine;
std::unique_ptr<AlignmentEngine> createAlignmentEngine(
AlignmentType alignment_type, int8_t match, int8_t mismatch, int8_t gap);
std::unique_ptr<AlignmentEngine> createAlignmentEngine(AlignmentType type,
std::int8_t m, std::int8_t n, std::int8_t g);
std::unique_ptr<AlignmentEngine> createAlignmentEngine(AlignmentType type,
std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e);
std::unique_ptr<AlignmentEngine> createAlignmentEngine(AlignmentType type,
std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e,
std::int8_t q, std::int8_t c);
class AlignmentEngine {
public:
virtual ~AlignmentEngine() {}
virtual void prealloc(uint32_t max_sequence_size, uint32_t alphabet_size) = 0;
virtual void prealloc(std::uint32_t max_sequence_size,
std::uint32_t alphabet_size) = 0;
Alignment align_sequence_with_graph(const std::string& sequence,
Alignment align(const std::string& sequence,
const std::unique_ptr<Graph>& graph);
virtual Alignment align_sequence_with_graph(const char* sequence,
uint32_t sequence_size, const std::unique_ptr<Graph>& graph) = 0;
virtual Alignment align(const char* sequence, std::uint32_t sequence_size,
const std::unique_ptr<Graph>& graph) noexcept = 0;
protected:
AlignmentEngine(AlignmentType alignment_type, int8_t match, int8_t mismatch,
int8_t gap);
AlignmentEngine(AlignmentType type, AlignmentSubtype subtype, std::int8_t m,
std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q,
std::int8_t c);
AlignmentEngine(const AlignmentEngine&) = delete;
const AlignmentEngine& operator=(const AlignmentEngine&) = delete;
AlignmentType alignment_type_;
int8_t match_;
int8_t mismatch_;
int8_t gap_;
AlignmentType type_;
AlignmentSubtype subtype_;
std::int8_t m_;
std::int8_t n_;
std::int8_t g_;
std::int8_t e_;
std::int8_t q_;
std::int8_t c_;
};
}
......@@ -6,7 +6,7 @@
#pragma once
#include <stdint.h>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
......@@ -21,7 +21,7 @@ class Edge;
class Graph;
std::unique_ptr<Graph> createGraph();
using Alignment = std::vector<std::pair<int32_t, int32_t>>;
using Alignment = std::vector<std::pair<std::int32_t, std::int32_t>>;
class Graph {
public:
......@@ -31,72 +31,80 @@ public:
return nodes_;
}
const std::vector<uint32_t>& rank_to_node_id() const {
const std::vector<std::uint32_t>& rank_to_node_id() const {
return rank_to_node_id_;
}
uint32_t num_codes() const {
std::uint32_t num_codes() const {
return num_codes_;
};
uint8_t coder(uint8_t c) const {
std::uint8_t coder(std::uint8_t c) const {
return coder_[c];
}
uint8_t decoder(uint8_t code) const {
std::uint8_t decoder(std::uint8_t code) const {
return decoder_[code];
}
void add_alignment(const Alignment& alignment, const std::string& sequence,
uint32_t weight = 1);
std::uint32_t weight = 1);
void add_alignment(const Alignment& alignment, const char* sequence,
uint32_t sequence_size, uint32_t weight = 1);
std::uint32_t sequence_size, std::uint32_t weight = 1);
void add_alignment(const Alignment& alignment, const std::string& sequence,
const std::string& quality);
void add_alignment(const Alignment& alignment, const char* sequence,
uint32_t sequence_size, const char* quality, uint32_t quality_size);
std::uint32_t sequence_size, const char* quality,
std::uint32_t quality_size);
void add_alignment(const Alignment& alignment, const std::string& sequence,
const std::vector<uint32_t>& weights);
const std::vector<std::uint32_t>& weights);
void add_alignment(const Alignment& alignment, const char* sequence,
uint32_t sequence_size, const std::vector<uint32_t>& weights);
std::uint32_t sequence_size, const std::vector<std::uint32_t>& weights);
void generate_multiple_sequence_alignment(std::vector<std::string>& dst,
bool include_consensus = false);
std::string generate_consensus();
// returns base coverages or complete summary matrix if verbose equals true
std::string generate_consensus(std::vector<uint32_t>& dst, bool verbose = false);
std::string generate_consensus(std::vector<std::uint32_t>& dst,
bool verbose = false);
std::unique_ptr<Graph> subgraph(uint32_t begin_node_id, uint32_t end_node_id,
std::vector<int32_t>& subgraph_to_graph_mapping) const;
std::unique_ptr<Graph> subgraph(std::uint32_t begin_node_id,
std::uint32_t end_node_id,
std::vector<std::int32_t>& subgraph_to_graph_mapping) const;
void update_alignment(Alignment& alignment,
const std::vector<int32_t>& subgraph_to_graph_mapping) const;
const std::vector<std::int32_t>& subgraph_to_graph_mapping) const;
void print_dot(const std::string& path) const;
void clear();
friend std::unique_ptr<Graph> createGraph();
private:
Graph();
Graph(const Graph&) = delete;
const Graph& operator=(const Graph&) = delete;
static std::unique_ptr<Node> createNode(uint32_t id, uint32_t code);
static std::unique_ptr<Node> createNode(std::uint32_t id, std::uint32_t code);
static std::unique_ptr<Edge> createEdge(uint32_t begin_node_id,
uint32_t end_node_id, uint32_t label, uint32_t weight);
static std::unique_ptr<Edge> createEdge(std::uint32_t begin_node_id,
std::uint32_t end_node_id, std::uint32_t label, std::uint32_t weight);
uint32_t add_node(uint32_t code);
std::uint32_t add_node(std::uint32_t code);
void add_edge(uint32_t begin_node_id, uint32_t end_node_id, uint32_t weight);
void add_edge(std::uint32_t begin_node_id, std::uint32_t end_node_id,
std::uint32_t weight);
int32_t add_sequence(const char* sequence, const std::vector<uint32_t>& weights,
uint32_t begin, uint32_t end);
std::int32_t add_sequence(const char* sequence,
const std::vector<std::uint32_t>& weights, std::uint32_t begin,
std::uint32_t end);
void topological_sort();
......@@ -104,34 +112,35 @@ private:
void traverse_heaviest_bundle();
uint32_t branch_completion(std::vector<int64_t>& scores,
std::vector<int32_t>& predecessors,
uint32_t rank);
std::uint32_t branch_completion(std::vector<std::int64_t>& scores,
std::vector<std::int32_t>& predecessors,
std::uint32_t rank);
void extract_subgraph_nodes(std::vector<bool>& dst, uint32_t current_node_id,
uint32_t end_node_id) const;
void extract_subgraph_nodes(std::vector<bool>& dst,
std::uint32_t current_node_id, std::uint32_t end_node_id) const;
uint32_t initialize_multiple_sequence_alignment(std::vector<uint32_t>& dst) const;
std::uint32_t initialize_multiple_sequence_alignment(
std::vector<std::uint32_t>& dst) const;
uint32_t num_sequences_;
uint32_t num_codes_;
std::vector<int32_t> coder_;
std::vector<int32_t> decoder_;
std::uint32_t num_sequences_;
std::uint32_t num_codes_;
std::vector<std::int32_t> coder_;
std::vector<std::int32_t> decoder_;
std::vector<std::unique_ptr<Node>> nodes_;
std::vector<uint32_t> rank_to_node_id_;
std::vector<uint32_t> sequences_begin_nodes_ids_;
std::vector<uint32_t> consensus_;
std::vector<std::uint32_t> rank_to_node_id_;
std::vector<std::uint32_t> sequences_begin_nodes_ids_;
std::vector<std::uint32_t> consensus_;
};
class Node {
public:
~Node();
uint32_t id() const {
std::uint32_t id() const {
return id_;
}
uint32_t code() const {
std::uint32_t code() const {
return code_;
}
......@@ -143,53 +152,55 @@ public:
return out_edges_;
}
const std::vector<uint32_t>& aligned_nodes_ids() const {
const std::vector<std::uint32_t>& aligned_nodes_ids() const {
return aligned_nodes_ids_;
}
bool successor(uint32_t& dst, uint32_t label) const;
bool successor(std::uint32_t& dst, std::uint32_t label) const;
uint32_t coverage() const;
std::uint32_t coverage() const;
friend Graph;
private:
Node(uint32_t id, uint32_t code);
Node(std::uint32_t id, std::uint32_t code);
Node(const Node&) = delete;
const Node& operator=(const Node&) = delete;
uint32_t id_;
uint32_t code_;
std::uint32_t id_;
std::uint32_t code_;
std::vector<std::shared_ptr<Edge>> in_edges_;
std::vector<std::shared_ptr<Edge>> out_edges_;
std::vector<uint32_t> aligned_nodes_ids_;
std::vector<std::uint32_t> aligned_nodes_ids_;
};
class Edge {
public:
~Edge();
uint32_t begin_node_id() const {
std::uint32_t begin_node_id() const {
return begin_node_id_;
}
uint32_t end_node_id() const {
std::uint32_t end_node_id() const {
return end_node_id_;
}
friend Graph;
friend Node;
private:
Edge(uint32_t begin_node_id, uint32_t end_node_id, uint32_t label,
uint32_t weight);
Edge(std::uint32_t begin_node_id, std::uint32_t end_node_id,
std::uint32_t label, std::uint32_t weight);
Edge(const Edge&) = delete;
const Edge& operator=(const Edge&) = delete;
void add_sequence(uint32_t label, uint32_t weight = 1);
void add_sequence(std::uint32_t label, std::uint32_t weight = 1);
uint32_t begin_node_id_;
uint32_t end_node_id_;
std::vector<uint32_t> sequence_labels_;
int64_t total_weight_;
std::uint32_t begin_node_id_;
std::uint32_t end_node_id_;
std::vector<std::uint32_t> sequence_labels_;
std::int64_t total_weight_;
};
}
......@@ -4,9 +4,9 @@
* @brief AlignmentEngine class source file
*/
#include <stdlib.h>
#include <limits>
#include <algorithm>
#include <exception>
#include "sisd_alignment_engine.hpp"
#include "simd_alignment_engine.hpp"
......@@ -14,45 +14,69 @@
namespace spoa {
std::unique_ptr<AlignmentEngine> createAlignmentEngine(
AlignmentType alignment_type, int8_t match, int8_t mismatch,
int8_t gap) {
std::unique_ptr<AlignmentEngine> createAlignmentEngine(AlignmentType type,
std::int8_t m, std::int8_t n, std::int8_t g) {
if (alignment_type != AlignmentType::kSW &&
alignment_type != AlignmentType::kNW &&
alignment_type != AlignmentType::kOV) {
return createAlignmentEngine(type, m, n, g, g);
}
std::unique_ptr<AlignmentEngine> createAlignmentEngine(AlignmentType type,
std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e) {
fprintf(stderr, "[spoa::createAlignmentEngine] error: "
"invalid alignment type!\n");
exit(1);
return createAlignmentEngine(type, m, n, g, e, g, e);
}
if (gap >= 0) {
fprintf(stderr, "[spoa::createAlignmentEngine] error: "
"gap penalty must be negative!\n");
exit(1);
std::unique_ptr<AlignmentEngine> createAlignmentEngine(AlignmentType type,
std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e,
std::int8_t q, std::int8_t c) {
if (type != AlignmentType::kSW &&
type != AlignmentType::kNW &&
type != AlignmentType::kOV) {
throw std::invalid_argument("[spoa::createAlignmentEngine] error: "
"invalid alignment type!");
}
if (g > 0 || q > 0) {
throw std::invalid_argument("[spoa::createAlignmentEngine] error: "
"gap opening penalty must be non-positive!");
}
if (e > 0 || c > 0) {
throw std::invalid_argument("[spoa::createAlignmentEngine] error: "
"gap extension penalty must be non-positive!");
}
AlignmentSubtype subtype = g >= e ?
AlignmentSubtype::kLinear : (g <= q || e >= c ?
AlignmentSubtype::kAffine : AlignmentSubtype::kConvex);
if (subtype == AlignmentSubtype::kLinear) {
e = g;
} else if (subtype == AlignmentSubtype::kAffine) {
q = g;
c = e;
}
auto alignment_engine = createSimdAlignmentEngine(alignment_type, match,
mismatch, gap);
auto alignment_engine = createSimdAlignmentEngine(type, subtype, m, n, g, e,
q, c);
if (alignment_engine == nullptr) {
return createSisdAlignmentEngine(alignment_type, match, mismatch, gap);
return createSisdAlignmentEngine(type, subtype, m, n, g, e, q, c);
}
return alignment_engine;
}
AlignmentEngine::AlignmentEngine(AlignmentType alignment_type, int8_t match,
int8_t mismatch, int8_t gap)
: alignment_type_(alignment_type), match_(match), mismatch_(mismatch),
gap_(gap) {
AlignmentEngine::AlignmentEngine(AlignmentType type, AlignmentSubtype subtype,
std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e,
std::int8_t q, std::int8_t c)
: type_(type), subtype_(subtype), m_(m), n_(n), g_(g), e_(e), q_(q), c_(c) {
}
Alignment AlignmentEngine::align_sequence_with_graph(const std::string& sequence,
Alignment AlignmentEngine::align(const std::string& sequence,
const std::unique_ptr<Graph>& graph) {
return this->align_sequence_with_graph(sequence.c_str(), sequence.size(), graph);
return align(sequence.c_str(), sequence.size(), graph);
}
}
This diff is collapsed.
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <cstdint>
#include <string>
#include <iostream>
#include <exception>
#include "sequence.hpp"
#include "spoa/spoa.hpp"
#include "bioparser/bioparser.hpp"
static const char* version = "v1.1.5";
static const std::string version = "v3.0.0";
static struct option options[] = {
{"match", required_argument, 0, 'm'},
{"mismatch", required_argument, 0, 'x'},
{"gap", required_argument, 0, 'g'},
{"algorithm", required_argument, 0, 'l'},
{"result", required_argument, 0, 'r'},
{"dot", required_argument, 0, 'd'},
{"version", no_argument, 0, 'v'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
{"algorithm", required_argument, nullptr, 'l'},
{"result", required_argument, nullptr, 'r'},
{"dot", required_argument, nullptr, 'd'},
{"version", no_argument, nullptr, 'v'},
{"help", no_argument, nullptr, 'h'},
{nullptr, 0, nullptr, 0}
};
void help();
int main(int argc, char** argv) {
int8_t match = 5;
int8_t mismatch = -4;
int8_t gap = -8;
std::int8_t m = 5;
std::int8_t n = -4;
std::int8_t g = -8;
std::int8_t e = -6;
std::int8_t q = -10;
std::int8_t c = -4;
uint8_t algorithm = 0;
uint8_t result = 0;
std::uint8_t algorithm = 0;
std::uint8_t result = 0;
std::string dot_path = "";
char opt;
while ((opt = getopt_long(argc, argv, "m:x:g:l:r:d:h", options, nullptr)) != -1) {
while ((opt = getopt_long(argc, argv, "m:n:g:e:q:c:l:r:d:h", options, nullptr)) != -1) {
switch (opt) {
case 'm':
match = atoi(optarg);
break;
case 'x':
mismatch = atoi(optarg);
break;
case 'g':
gap = atoi(optarg);
break;
case 'l':
algorithm = atoi(optarg);
break;
case 'r':
result = atoi(optarg);
break;
case 'd':
dot_path = optarg;
break;
case 'v':
printf("%s\n", version);
exit(0);
case 'h':
help();
exit(0);
default:
exit(1);
case 'm': m = atoi(optarg); break;
case 'n': n = atoi(optarg); break;
case 'g': g = atoi(optarg); break;
case 'e': e = atoi(optarg); break;
case 'q': q = atoi(optarg); break;
case 'c': c = atoi(optarg); break;
case 'l': algorithm = atoi(optarg); break;
case 'r': result = atoi(optarg); break;
case 'd': dot_path = optarg; break;
case 'v': std::cout << version << std::endl; return 0;
case 'h': help(); return 0;
default: return 1;
}
}
if (optind >= argc) {
fprintf(stderr, "[spoa::] error: missing input file!\n");
std::cerr << "[spoa::] error: missing input file!" << std::endl;
help();
exit(1);
return 1;
}
std::string sequences_path = argv[optind];
......@@ -92,45 +81,55 @@ int main(int argc, char** argv) {
sparser = bioparser::createParser<bioparser::FastqParser, spoa::Sequence>(
sequences_path);
} else {
fprintf(stderr, "[spoa::] error: "
"file %s has unsupported format extension (valid extensions: "
".fasta, .fasta.gz, .fa, .fa.gz, .fastq, .fastq.gz, .fq, .fq.gz)!\n",
sequences_path.c_str());
exit(1);
std::cerr << "[spoa::] error: file " << sequences_path <<
" has unsupported format extension (valid extensions: .fasta, "
".fasta.gz, .fa, .fa.gz, .fastq, .fastq.gz, .fq, .fq.gz)!" <<
std::endl;
return 1;
}
auto alignment_engine = spoa::createAlignmentEngine(
static_cast<spoa::AlignmentType>(algorithm), match, mismatch, gap);
std::unique_ptr<spoa::AlignmentEngine> alignment_engine;
try {
alignment_engine = spoa::createAlignmentEngine(
static_cast<spoa::AlignmentType>(algorithm), m, n, g, e, q, c);
} catch(std::invalid_argument& exception) {
std::cerr << exception.what() << std::endl;
return 1;
}
auto graph = spoa::createGraph();
std::vector<std::unique_ptr<spoa::Sequence>> sequences;
sparser->parse_objects(sequences, -1);
sparser->parse(sequences, -1);
size_t max_sequence_size = 0;
std::size_t max_sequence_size = 0;
for (const auto& it: sequences) {
max_sequence_size = std::max(max_sequence_size, it->data().size());
}
alignment_engine->prealloc(max_sequence_size, 4);
for (const auto& it: sequences) {
auto alignment = alignment_engine->align_sequence_with_graph(it->data(),
graph);
auto alignment = alignment_engine->align(it->data(), graph);
try {
graph->add_alignment(alignment, it->data(), it->quality());
} catch(std::invalid_argument& exception) {
std::cerr << exception.what() << std::endl;
return 1;
}
}
if (result == 0 || result == 2) {
std::string consensus = graph->generate_consensus();
fprintf(stdout, "Consensus (%zu)\n", consensus.size());
fprintf(stdout, "%s\n", consensus.c_str());
std::cout << "Consensus (" << consensus.size() << ")" << std::endl;
std::cout << consensus << std::endl;
}
if (result == 1 || result == 2) {
std::vector<std::string> msa;
graph->generate_multiple_sequence_alignment(msa);
fprintf(stdout, "Multiple sequence alignment\n");
std::cout << "Multiple sequence alignment" << std::endl;
for (const auto& it: msa) {
fprintf(stdout, "%s\n", it.c_str());
std::cout << it << std::endl;
}
}
......@@ -140,7 +139,7 @@ int main(int argc, char** argv) {
}
void help() {
printf(
std::cout <<
"usage: spoa [options ...] <sequences>\n"
"\n"
" <sequences>\n"
......@@ -148,15 +147,26 @@ void help() {
" containing sequences\n"
"\n"
" options:\n"
" -m, --match <int>\n"
" -m <int>\n"
" default: 5\n"
" score for matching bases\n"
" -x, --mismatch <int>\n"
" -n <int>\n"
" default: -4\n"
" score for mismatching bases\n"
" -g, --gap <int>\n"
" -g <int>\n"
" default: -8\n"
" gap penalty (must be negative)\n"
" gap opening penalty (must be non-positive)\n"
" -e <int>\n"
" default: -6\n"
" gap extension penalty (must be non-positive)\n"
" -q <int>\n"
" default: -10\n"
" gap opening penalty of the second affine function\n"
" (must be non-positive)\n"
" -c <int>\n"
" default: -4\n"
" gap extension penalty of the second affine function\n"
" (must be non-positive)\n"
" -l, --algorithm <int>\n"
" default: 0\n"
" alignment mode:\n"
......@@ -174,5 +184,10 @@ void help() {
" --version\n"
" prints the version number\n"
" -h, --help\n"
" prints the usage\n");
" prints the usage\n"
"\n"
" gap mode:\n"
" linear if g >= e\n"
" affine if g <= q or e >= c\n"
" convex otherwise (default)\n";
}
......@@ -8,14 +8,15 @@
namespace spoa {
Sequence::Sequence(const char* name, uint32_t name_size, const char* data,
uint32_t data_size)
Sequence::Sequence(const char* name, std::uint32_t name_size,
const char* data, std::uint32_t data_size)
: name_(name, name_size), data_(data, data_size), quality_(
data_size, 34) {
}
Sequence::Sequence(const char* name, uint32_t name_size, const char* data,
uint32_t data_size, const char* quality, uint32_t quality_size)
Sequence::Sequence(const char* name, std::uint32_t name_size,
const char* data, std::uint32_t data_size,
const char* quality, std::uint32_t quality_size)
: name_(name, name_size), data_(data, data_size), quality_(quality,
quality_size) {
}
......
......@@ -6,7 +6,7 @@
#pragma once
#include <stdint.h>
#include <cstdint>
#include <memory>
#include <vector>
#include <string>
......@@ -23,7 +23,6 @@ namespace spoa {
class Sequence {
public:
~Sequence() = default;
const std::string& name() const {
......@@ -42,11 +41,11 @@ public:
friend bioparser::FastqParser<Sequence>;
private:
Sequence(const char* name, uint32_t name_size, const char* data,
uint32_t data_size);
Sequence(const char* name, uint32_t name_size, const char* data,
uint32_t data_size, const char* quality, uint32_t quality_size);
Sequence(const char* name, std::uint32_t name_size,
const char* data, std::uint32_t data_size);
Sequence(const char* name, std::uint32_t name_size,
const char* data, std::uint32_t data_size,
const char* quality, std::uint32_t quality_size);
Sequence(const Sequence&) = delete;
const Sequence& operator=(const Sequence&) = delete;
......
This diff is collapsed.
......@@ -6,11 +6,10 @@
#pragma once
#include <stdint.h>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include <utility>
#include "spoa/alignment_engine.hpp"
......@@ -19,37 +18,51 @@ namespace spoa {
class Graph;
class SimdAlignmentEngine;
std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(
AlignmentType alignment_type, int8_t match, int8_t mismatch, int8_t gap);
std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(AlignmentType type,
AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
std::int8_t e, std::int8_t q, std::int8_t c);
class SimdAlignmentEngine: public AlignmentEngine {
public:
~SimdAlignmentEngine();
void prealloc(uint32_t max_sequence_size, uint32_t alphabet_size) override;
void prealloc(std::uint32_t max_sequence_size,
std::uint32_t alphabet_size) override;
Alignment align_sequence_with_graph(const char* sequence,
uint32_t sequence_size, const std::unique_ptr<Graph>& graph) override;
Alignment align(const char* sequence, std::uint32_t sequence_size,
const std::unique_ptr<Graph>& graph) noexcept override;
friend std::unique_ptr<AlignmentEngine> createSimdAlignmentEngine(
AlignmentType alignment_type, int8_t match, int8_t mismatch, int8_t gap);
AlignmentType type, AlignmentSubtype subtype, std::int8_t m,
std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t q,
std::int8_t c);
private:
SimdAlignmentEngine(AlignmentType alignment_type, int8_t match,
int8_t mismatch, int8_t gap);
SimdAlignmentEngine(AlignmentType type, AlignmentSubtype subtype,
std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e,
std::int8_t q, std::int8_t c);
SimdAlignmentEngine(const SimdAlignmentEngine&) = delete;
const SimdAlignmentEngine& operator=(const SimdAlignmentEngine&) = delete;
template<typename T>
Alignment align(const char* sequence, uint32_t sequence_size,
Alignment linear(const char* sequence, std::uint32_t sequence_size,
const std::unique_ptr<Graph>& graph) noexcept;
template<typename T>
Alignment affine(const char* sequence, std::uint32_t sequence_size,
const std::unique_ptr<Graph>& graph) noexcept;
template<typename T>
Alignment convex(const char* sequence, std::uint32_t sequence_size,
const std::unique_ptr<Graph>& graph) noexcept;
void realloc(uint32_t matrix_width, uint32_t matrix_height,
uint32_t num_codes);
void realloc(std::uint32_t matrix_width, std::uint32_t matrix_height,
std::uint32_t num_codes);
template<typename T>
void initialize(const char* sequence, const std::unique_ptr<Graph>& graph,
uint32_t normal_matrix_width, uint32_t matrix_width,
uint32_t matrix_height) noexcept;
std::uint32_t normal_matrix_width, std::uint32_t matrix_width,
std::uint32_t matrix_height) noexcept;
struct Implementation;
std::unique_ptr<Implementation> pimpl_;
......
This diff is collapsed.
......@@ -6,11 +6,10 @@
#pragma once
#include <stdint.h>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include <utility>
#include "spoa/alignment_engine.hpp"
......@@ -19,33 +18,45 @@ namespace spoa {
class Graph;
class SisdAlignmentEngine;
std::unique_ptr<AlignmentEngine> createSisdAlignmentEngine(
AlignmentType alignment_type, int8_t match, int8_t mismatch, int8_t gap);
std::unique_ptr<AlignmentEngine> createSisdAlignmentEngine(AlignmentType type,
AlignmentSubtype subtype, std::int8_t m, std::int8_t n, std::int8_t g,
std::int8_t e, std::int8_t q, std::int8_t c);
class SisdAlignmentEngine: public AlignmentEngine {
public:
~SisdAlignmentEngine();
void prealloc(uint32_t max_sequence_size, uint32_t alphabet_size) override;
void prealloc(std::uint32_t max_sequence_size,
std::uint32_t alphabet_size) override;
Alignment align_sequence_with_graph(const char* sequence,
uint32_t sequence_size, const std::unique_ptr<Graph>& graph) override;
Alignment align(const char* sequence, std::uint32_t sequence_size,
const std::unique_ptr<Graph>& graph) noexcept override;
friend std::unique_ptr<AlignmentEngine> createSisdAlignmentEngine(
AlignmentType alignment_type, int8_t match, int8_t mismatch, int8_t gap);
AlignmentType type, AlignmentSubtype subtype, std::int8_t m,
std::int8_t n, std::int8_t g, std::int8_t e, std::int8_t c,
std::int8_t q);
private:
SisdAlignmentEngine(AlignmentType alignment_type, int8_t match,
int8_t mismatch, int8_t gap);
SisdAlignmentEngine(AlignmentType type, AlignmentSubtype subtype,
std::int8_t m, std::int8_t n, std::int8_t g, std::int8_t e,
std::int8_t q, std::int8_t c);
SisdAlignmentEngine(const SisdAlignmentEngine&) = delete;
const SisdAlignmentEngine& operator=(const SisdAlignmentEngine&) = delete;
Alignment align(const char* sequence, uint32_t sequence_size,
Alignment linear(const char* sequence, std::uint32_t sequence_size,
const std::unique_ptr<Graph>& graph) noexcept;
Alignment affine(const char* sequence, std::uint32_t sequence_size,
const std::unique_ptr<Graph>& graph) noexcept;
Alignment convex(const char* sequence, std::uint32_t sequence_size,
const std::unique_ptr<Graph>& graph) noexcept;
void realloc(uint32_t matrix_width, uint32_t matrix_height,
uint32_t num_codes);
void realloc(std::uint32_t matrix_width, std::uint32_t matrix_height,
std::uint32_t num_codes);
void initialize(const char* sequence, uint32_t sequence_size,
void initialize(const char* sequence, std::uint32_t sequence_size,
const std::unique_ptr<Graph>& graph) noexcept;
struct Implementation;
......