Skip to content
Commits on Source (7)
BasedOnStyle: Google
BreakBeforeBraces: Mozilla
AllowShortLoopsOnASingleLine: false
AccessModifierOffset: -4
BreakConstructorInitializersBeforeComma: true
ColumnLimit: 100
IndentWidth: 4
PointerAlignment: Left
TabWidth: 4
ReflowComments: false # protect ASCII art in comments
KeepEmptyLinesAtTheStartOfBlocks: true
......@@ -8,5 +8,11 @@ tags
defines.mk
all.xml
*.h5
libconfig.h
LibBlasrConfig.h
/hdf/hdf5-1.8.12-headers/
/build
# Meson WrapDB stuff
subprojects/packagecache/
subprojects/googletest*
subprojects/pbbam*
[submodule "pbbam"]
path = pbbam
url = ../pbbam.git
branch = master
[submodule "googletest"]
path = googletest
url = https://github.com/google/googletest.git
branch = master
##############################################
# CMake build script for the BLASRLIBCPP library
##############################################
cmake_policy(SET CMP0048 NEW)
project(BLASRLIBCPP VERSION 5.3.0 LANGUAGES CXX C)
cmake_minimum_required(VERSION 3.6)
set(ROOT_PROJECT_NAME ${PROJECT_NAME} CACHE STRING "root project name")
# Build type
IF(NOT CMAKE_BUILD_TYPE)
SET(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: Debug Release Profile RelWithDebInfo ReleaseWithAssert" FORCE)
ENDIF(NOT CMAKE_BUILD_TYPE)
# Main project paths
set(BLASRLIBCPP_RootDir ${BLASRLIBCPP_SOURCE_DIR})
# Project configuration
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake ${CMAKE_MODULE_PATH})
# Fixed order, do not sort or shuffle
include(blasrlibcpp-ccache)
include(blasrlibcpp-releasewithassert)
include(blasrlibcpp-dependencies)
include(blasrlibcpp-compilerflags)
include(blasrlibcpp-gitsha1)
file(WRITE pbdata/LibBlasrConfig.h "")
GET_PROPERTY(BLASRLIBCPP_COMPIPLE_FLAGS GLOBAL PROPERTY BLASRLIBCPP_COMPIPLE_FLAGS_GLOBAL)
GET_PROPERTY(BLASRLIBCPP_LINK_FLAGS GLOBAL PROPERTY BLASRLIBCPP_LINK_FLAGS_GLOBAL)
GET_PROPERTY(HDF5_LINKER_FLAG_LOCAL GLOBAL PROPERTY HDF5_LINKER_FLAG_GLOBAL)
set(LOCAL_LINKER_FLAGS "${HDF5_LINKER_FLAG_LOCAL} ${BLASRLIBCPP_LINK_FLAGS}")
# libcpp library
file(GLOB HDF5_CPP "${BLASRLIBCPP_RootDir}/hdf/*.cpp")
file(GLOB_RECURSE ALIGNMENT_CPP "${BLASRLIBCPP_RootDir}/alignment/*.cpp")
file(GLOB_RECURSE PBDATA_CPP "${BLASRLIBCPP_RootDir}/pbdata/*.cpp")
add_library(libcpp
${HDF5_CPP}
${ALIGNMENT_CPP}
${PBDATA_CPP}
)
target_include_directories(libcpp PUBLIC
${BLASRLIBCPP_RootDir}/hdf
${BLASRLIBCPP_RootDir}/alignment
${BLASRLIBCPP_RootDir}/pbdata
${HDF5_INCLUDE_DIRS}
${PacBioBAM_INCLUDE_DIRS}
)
target_link_libraries(libcpp ${PacBioBAM_LIBRARIES})
set_target_properties(libcpp PROPERTIES COMPILE_FLAGS ${BLASRLIBCPP_COMPIPLE_FLAGS})
if (LOCAL_LINKER_FLAGS)
set_target_properties(libcpp PROPERTIES LINK_FLAGS ${LOCAL_LINKER_FLAGS})
endif()
# Tests
enable_testing()
## build gtest
add_subdirectory(${BLASRLIBCPP_RootDir}/googletest/googletest external/gtest)
## build libcpp tests
file(GLOB_RECURSE TEST_CPP "${BLASRLIBCPP_RootDir}/unittest/*/*.cpp")
add_executable(libcpptest ${TEST_CPP})
target_include_directories(libcpptest PUBLIC ${BLASRLIBCPP_RootDir}/unittest)
target_link_libraries(libcpptest gtest_main libcpp hdf5 hdf5_cpp ${ZLIB_LDFLAGS})
add_test(libcpptest libcpptest)
set_target_properties(libcpptest PROPERTIES COMPILE_FLAGS "${BLASRLIBCPP_COMPIPLE_FLAGS}")
if (LOCAL_LINKER_FLAGS)
set_target_properties(libcpptest PROPERTIES LINK_FLAGS ${LOCAL_LINKER_FLAGS})
endif()
add_custom_target(check_libcpp
COMMAND libcpptest --gtest_output=xml:${CMAKE_BINARY_DIR}/libcpp-unit.xml
WORKING_DIRECTORY ${BLASRLIBCPP_RootDir})
# Developer environment
## Clang-format pre commit hook
### What?
The intention here is to get source code formatting automatically
checked and guaranteed before checkin.
### Why?
- Automate style-guide compliance to avoid squabbles
- Post-checkin reformatting binges mess up history. It's best to get
it right from the get-go.
### How?
See the tools:
- `tools/check-formatting --staged` for fast style-checking of
changed files, in a git commit/push hook;
- `tools/check-formatting --all` for slower style-checking of everything
- `tools/format-all`to format everything
To enable the checking as a pre-commit hook, place the following in
`.git/hooks/pre-commit` (and make that file executable):
```sh
#!/bin/bash
./tools/check-formatting --staged
```
### Tips
- We should *not* reformat bundled third-party code as it will make it
difficult to diff with upstream. Put such code in a subdirectory
with its own .clang-format file containing: "BasedOnStyle: None" to
disable formatting.
- If clang-format mangles something (for example, a comment block with
ASCII art or significant whitespace), you can protect a block using
`// clang-format off` and then `// clang-format on`.
* What are these binaries?
The `clang-format` binaries that are checked in are static builds of
clang-format v3.9 from https://github.com/angular/clang-format.
They are bundled because the LLVM toolchain has a reputation for
rapid change and incompatibility. We want to guarantee that devs
are using the same version as CI is.
......@@ -13,10 +13,38 @@
For more information, see
* https://github.com/PacificBiosciences/blasr_libcpp/wiki
## Building
## Building using make
The simplest way is:
```
NOPBBAM=1 ./configure.py
make -j all
```
That will skip pbbam, and it will download HDF5 headers.
## Building using cmake
Make sure that you are using cmake >=3.7 and
always start from an empty build subdirectory!
git clone git://github.com/PacificBiosciences/blasr_libcpp.git && cd blasr_libcpp
git submodule update --init --remote
mkdir build && cd build
cmake -GNinja .. && ninja check_libcpp
Is your HDF5 in a custom location?
cmake -GNinja -DHDF5_ROOT=/your/location/hdf-1.8.16 ..
Are HDF$ libraries and include folders in different locations?
cmake -GNinja -DHDF5_LIBRARIES=/your/location/hdf-1.8.16/lib
-DHDF5_INCLUDE_DIRS=/other/location/hdf-1.8.16/include ..
Prefer a custom libz implementation?
cmake -GNinja -DZLIB_INCLUDE_DIRS=/your/location/zlib/include \
-DZLIB_LIBRARIES=/your/location/zlib/libz.so ..
DISCLAIMER
----------
THIS WEBSITE AND CONTENT AND ALL SITE-RELATED SERVICES, INCLUDING ANY DATA, ARE PROVIDED "AS IS," WITH ALL FAULTS, WITH NO REPRESENTATIONS OR WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, SATISFACTORY QUALITY, NON-INFRINGEMENT OR FITNESS FOR A PARTICULAR PURPOSE. YOU ASSUME TOTAL RESPONSIBILITY AND RISK FOR YOUR USE OF THIS SITE, ALL SITE-RELATED SERVICES, AND ANY THIRD PARTY WEBSITES OR APPLICATIONS. NO ORAL OR WRITTEN INFORMATION OR ADVICE SHALL CREATE A WARRANTY OF ANY KIND. ANY REFERENCES TO SPECIFIC PRODUCTS OR SERVICES ON THE WEBSITES DO NOT CONSTITUTE OR IMPLY A RECOMMENDATION OR ENDORSEMENT BY PACIFIC BIOSCIENCES.
#include <alignment/MappingMetrics.hpp>
#include <iomanip>
#include <unistd.h>
#include "MappingMetrics.hpp"
//In order to use clock_gettime in LINUX, add -lrt
#ifdef __APPLE__
#pragma weak clock_gettime
int clock_gettime(clockid_t clk_id, struct timespec *tp) {
#if MAC_OS_X_VERSION_MAX_ALLOWED < 101200
int my_clock_gettime_(clockid_t clk_id, struct timespec *tp)
{
kern_return_t ret;
clock_serv_t clk;
clock_id_t clk_serv_id;
......@@ -22,7 +25,8 @@ int clock_gettime(clockid_t clk_id, struct timespec *tp) {
case CLOCK_REALTIME:
case CLOCK_MONOTONIC:
clk_serv_id = clk_id == CLOCK_REALTIME ? CALENDAR_CLOCK : SYSTEM_CLOCK;
if (KERN_SUCCESS == (ret = host_get_clock_service(mach_host_self(), clk_serv_id, &clk))) {
if (KERN_SUCCESS ==
(ret = host_get_clock_service(mach_host_self(), clk_serv_id, &clk))) {
if (KERN_SUCCESS == (ret = clock_get_time(clk, &tm))) {
tp->tv_sec = tm.tv_sec;
tp->tv_nsec = tm.tv_nsec;
......@@ -58,9 +62,12 @@ int clock_gettime(clockid_t clk_id, struct timespec *tp) {
}
return retval;
}
#define clock_gettime my_clock_gettime_
#endif // MAC_OS_X_VERSION_MAX_ALLOWED < 101200
#endif // __APPLE__
Timer::Timer(std::string _header) {
Timer::Timer(std::string _header)
{
keepHistogram = false;
keepList = false;
totalElapsedClock = 0;
......@@ -69,34 +76,29 @@ Timer::Timer(std::string _header) {
elapsedTime = 0.0;
}
int Timer::ListSize() {
return msecList.size();
}
int Timer::ListSize() { return msecList.size(); }
void Timer::PrintHeader(std::ostream &out) {
void Timer::PrintHeader(std::ostream &out)
{
if (msecList.size() > 0) {
out << header << " ";
}
}
void Timer::PrintListValue(std::ostream &out, int index) {
void Timer::PrintListValue(std::ostream &out, int index)
{
if (msecList.size() > 0) {
out << msecList[index] << " ";
}
}
void Timer::Tick() {
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &cpuclock[0]);
}
void Timer::Tick() { clock_gettime(CLOCK_THREAD_CPUTIME_ID, &cpuclock[0]); }
void Timer::SetStoreElapsedTime(bool value) {
keepList = value;
}
void Timer::SetStoreElapsedTime(bool value) { keepList = value; }
void Timer::SetStoreHistgram(bool value) {
keepHistogram = value;
}
void Timer::SetStoreHistgram(bool value) { keepHistogram = value; }
void Timer::Tock() {
void Timer::Tock()
{
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &cpuclock[1]);
elapsedClockMsec = (cpuclock[1].tv_nsec - cpuclock[0].tv_nsec) / 1000;
totalElapsedClock += elapsedClockMsec;
......@@ -105,8 +107,7 @@ void Timer::Tock() {
// keep a histogram in number of milliseconds per operation
if (histogram.find(elapsedClockMsec) == histogram.end()) {
histogram[elapsedClockMsec] = 1;
}
else {
} else {
histogram[elapsedClockMsec]++;
}
}
......@@ -115,30 +116,24 @@ void Timer::Tock() {
}
}
void Timer::Add(const Timer &rhs) {
void Timer::Add(const Timer &rhs)
{
elapsedClockMsec += rhs.elapsedClockMsec;
elapsedTime += rhs.elapsedTime;
totalElapsedClock += rhs.totalElapsedClock;
msecList.insert(msecList.end(), rhs.msecList.begin(), rhs.msecList.end());
}
void Timer::SetHeader(std::string _header) {
header = _header;
}
void Timer::SetHeader(std::string _header) { header = _header; }
void MappingClocks::AddCells(int nCells) {
nCellsPerSample.push_back(nCells);
}
void MappingClocks::AddCells(int nCells) { nCellsPerSample.push_back(nCells); }
void MappingClocks::AddBases(int nBases) {
nBasesPerSample.push_back(nBases);
}
void MappingClocks::AddBases(int nBases) { nBasesPerSample.push_back(nBases); }
int MappingClocks::GetSize() {
return total.ListSize();
}
int MappingClocks::GetSize() { return total.ListSize(); }
MappingClocks::MappingClocks() {
MappingClocks::MappingClocks()
{
total.SetHeader("Total");
findAnchors.SetHeader("FindAnchors");
mapToGenome.SetHeader("MapToGenome");
......@@ -147,7 +142,8 @@ MappingClocks::MappingClocks() {
alignIntervals.SetHeader("AlignIntervals");
}
void MappingClocks::PrintHeader(std::ostream &out) {
void MappingClocks::PrintHeader(std::ostream &out)
{
total.PrintHeader(out);
findAnchors.PrintHeader(out);
mapToGenome.PrintHeader(out);
......@@ -156,7 +152,8 @@ void MappingClocks::PrintHeader(std::ostream &out) {
alignIntervals.PrintHeader(out);
}
void MappingClocks::PrintList(std::ostream &out, int index) {
void MappingClocks::PrintList(std::ostream &out, int index)
{
total.PrintListValue(out, index);
findAnchors.PrintListValue(out, index);
......@@ -173,7 +170,8 @@ void MappingClocks::PrintList(std::ostream &out, int index) {
out << std::endl;
}
void MappingClocks::SetStoreList(bool value) {
void MappingClocks::SetStoreList(bool value)
{
total.SetStoreElapsedTime(value);
findAnchors.SetStoreElapsedTime(value);
mapToGenome.SetStoreElapsedTime(value);
......@@ -182,7 +180,8 @@ void MappingClocks::SetStoreList(bool value) {
alignIntervals.SetStoreElapsedTime(value);
}
void MappingClocks::AddClockTime(const MappingClocks &rhs) {
void MappingClocks::AddClockTime(const MappingClocks &rhs)
{
total.Add(rhs.total);
findAnchors.Add(rhs.findAnchors);
mapToGenome.Add(rhs.mapToGenome);
......@@ -191,7 +190,8 @@ void MappingClocks::AddClockTime(const MappingClocks &rhs) {
alignIntervals.Add(rhs.alignIntervals);
}
MappingMetrics::MappingMetrics() {
MappingMetrics::MappingMetrics()
{
numReads = 0;
numMappedReads = 0;
numMappedBases = 0;
......@@ -199,56 +199,55 @@ MappingMetrics::MappingMetrics() {
totalAnchorsForMappedReads = 0;
totalAnchors = 0;
}
void MappingMetrics::StoreSDPPoint(int nBases, int nSDPAnchors, int nClock) {
void MappingMetrics::StoreSDPPoint(int nBases, int nSDPAnchors, int nClock)
{
sdpBases.push_back(nBases);
sdpAnchors.push_back(nSDPAnchors);
sdpClock.push_back(nClock);
}
void MappingMetrics::SetStoreList(bool value) {
clocks.SetStoreList(value);
}
void MappingMetrics::SetStoreList(bool value) { clocks.SetStoreList(value); }
void MappingMetrics::PrintSeconds(std::ostream& out, long sec ){
out << sec << " Msec";
}
void MappingMetrics::PrintSeconds(std::ostream &out, long sec) { out << sec << " Msec"; }
void MappingMetrics::PrintFraction(std::ostream &out, float frac) {
void MappingMetrics::PrintFraction(std::ostream &out, float frac)
{
out << std::setprecision(2) << frac;
}
void MappingMetrics::RecordNumAlignedBases(int nBases) {
mappedBases.push_back(nBases);
}
void MappingMetrics::RecordNumAlignedBases(int nBases) { mappedBases.push_back(nBases); }
void MappingMetrics::RecordNumCells(int nCells) {
cellsPerAlignment.push_back(nCells);
}
void MappingMetrics::RecordNumCells(int nCells) { cellsPerAlignment.push_back(nCells); }
void MappingMetrics::Collect(MappingMetrics &rhs) {
void MappingMetrics::Collect(MappingMetrics &rhs)
{
clocks.AddClockTime(rhs.clocks);
totalAnchors += rhs.totalAnchors;
numReads += rhs.numReads;
numMappedReads += rhs.numMappedReads;
totalAnchorsForMappedReads += rhs.totalAnchorsForMappedReads;
mappedBases.insert(mappedBases.end(), rhs.mappedBases.begin(), rhs.mappedBases.end());
cellsPerAlignment.insert(cellsPerAlignment.end(), rhs.cellsPerAlignment.begin(), rhs.cellsPerAlignment.end());
cellsPerAlignment.insert(cellsPerAlignment.end(), rhs.cellsPerAlignment.begin(),
rhs.cellsPerAlignment.end());
}
void MappingMetrics::CollectSDPMetrics(MappingMetrics &rhs) {
void MappingMetrics::CollectSDPMetrics(MappingMetrics &rhs)
{
sdpAnchors.insert(sdpAnchors.end(), rhs.sdpAnchors.begin(), rhs.sdpAnchors.end());
sdpBases.insert(sdpBases.end(), rhs.sdpBases.begin(), rhs.sdpBases.end());
sdpClock.insert(sdpClock.end(), rhs.sdpClock.begin(), rhs.sdpClock.end());
}
void MappingMetrics::PrintSDPMetrics(std::ostream &out) {
void MappingMetrics::PrintSDPMetrics(std::ostream &out)
{
out << "nbases ncells time" << std::endl;
for (size_t i = 0; i < sdpAnchors.size(); i++) {
out << sdpBases[i] << " " << sdpAnchors[i] << " " << sdpClock[i] << std::endl;
}
}
void MappingMetrics::PrintFullList(std::ostream &out) {
void MappingMetrics::PrintFullList(std::ostream &out)
{
//
// Print the full header
//
......@@ -260,11 +259,12 @@ void MappingMetrics::PrintFullList(std::ostream &out) {
int i;
for (i = 0; i < clocks.GetSize(); i++) {
clocks.PrintList(out, i);
// out << mappedBases[i] << " " << cellsPerAlignment[i] << endl;
// out << mappedBases[i] << " " << cellsPerAlignment[i] << std::endl;
}
}
void MappingMetrics::PrintSummary(std::ostream &out) {
void MappingMetrics::PrintSummary(std::ostream &out)
{
out << "Examined " << numReads << std::endl;
out << "Mapped " << numMappedReads << std::endl;
out << "Total mapping time\t";
......@@ -290,9 +290,8 @@ void MappingMetrics::PrintSummary(std::ostream &out) {
out << "Total anchors: " << totalAnchors << std::endl;
out << " Anchors per read: " << (1.0 * totalAnchors) / numReads << std::endl;
out << "Total mapped: " << totalAnchorsForMappedReads << std::endl;
out << " Anchors per mapped read: " << (1.0*totalAnchorsForMappedReads) / numMappedReads << std::endl;
out << " Anchors per mapped read: " << (1.0 * totalAnchorsForMappedReads) / numMappedReads
<< std::endl;
}
void MappingMetrics::AddClock(MappingClocks &clocks) {
clocks.AddClockTime(clocks);
}
void MappingMetrics::AddClock(MappingClocks &clocks) { clocks.AddClockTime(clocks); }
#ifndef _BLASR_MAPPING_METRICS_HPP_
#define _BLASR_MAPPING_METRICS_HPP_
#include <ctime>
#include <iostream>
#include <time.h>
#include <map>
#include <vector>
//In order to use clock_gettime in LINUX, add -lrt
#ifdef __APPLE__
#include <mach/mach.h>
#include <AvailabilityMacros.h>
#if MAC_OS_X_VERSION_MAX_ALLOWED < 101200
#include <mach/clock.h>
#include <mach/mach.h>
#include <mach/mach_time.h>
#include <errno.h>
#include <cerrno>
typedef enum {
CLOCK_REALTIME,
......@@ -20,10 +23,12 @@ typedef enum {
} clockid_t;
static mach_timebase_info_data_t __clock_gettime_inf;
int clock_gettime(clockid_t clk_id, struct timespec *tp);
int my_clock_gettime_(clockid_t clk_id, struct timespec *tp);
#endif // MAC_OS_X_VERSION_MAX_ALLOWED < 101200
#endif // __APPLE__
class Timer {
class Timer
{
public:
bool keepHistogram, keepList;
timespec cpuclock[2];
......@@ -34,7 +39,6 @@ public:
long long totalElapsedClock;
std::string header;
Timer(std::string _header = "");
int ListSize();
......@@ -54,11 +58,10 @@ public:
void Add(const Timer &rhs);
void SetHeader(std::string _header);
};
class MappingClocks {
class MappingClocks
{
public:
Timer total;
Timer findAnchors;
......@@ -86,8 +89,8 @@ public:
void AddClockTime(const MappingClocks &rhs);
};
class MappingMetrics {
class MappingMetrics
{
public:
MappingClocks clocks;
int numReads;
......@@ -128,6 +131,4 @@ public:
void AddClock(MappingClocks &clocks);
};
#endif // _BLASR_MAPPING_METRICS_HPP_
......@@ -2,27 +2,23 @@
#define _BLASR_AFFINE_KBAND_ALIGN_HPP_
#include <cassert>
#include <vector>
#include <iostream>
#include "../../../pbdata/NucConversion.hpp"
#include "../../../pbdata/defs.h"
#include "../../../pbdata/matrix/FlatMatrix.hpp"
#include "../../datastructures/alignment/Alignment.hpp"
#include "KBandAlign.hpp"
#include <vector>
#include <pbdata/defs.h>
#include <alignment/algorithms/alignment/KBandAlign.hpp>
#include <alignment/datastructures/alignment/Alignment.hpp>
#include <pbdata/NucConversion.hpp>
#include <pbdata/matrix/FlatMatrix.hpp>
template <typename T_QuerySequence, typename T_TargetSequence, typename T_Alignment>
int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
int matchMat[5][5],
int hpInsOpen, int hpInsExtend, int insOpen, int insExtend,
int del, int k,
vector<int> &scoreMat,
vector<Arrow> & pathMat,
vector<int> &hpInsScoreMat,
vector<Arrow> &hpInsPathMat,
vector<int> &insScoreMat,
vector<Arrow> &insPathMat,
T_Alignment &alignment,
AlignmentType alignType) {
int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq, int matchMat[5][5],
int hpInsOpen, int hpInsExtend, int insOpen, int insExtend, int del, int k,
std::vector<int> &scoreMat, std::vector<Arrow> &pathMat,
std::vector<int> &hpInsScoreMat, std::vector<Arrow> &hpInsPathMat,
std::vector<int> &insScoreMat, std::vector<Arrow> &insPathMat,
T_Alignment &alignment, AlignmentType alignType)
{
//
// Make a copy of the sequences that is guaranteed to be in 3-bit format
......@@ -39,7 +35,6 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
tSeq.seq = ptSeq.seq;
tSeq.length = ptSeq.length;
DNALength tLen, qLen;
SetKBoundedLengths(tSeq.length, qSeq.length, k, tLen, qLen);
......@@ -59,7 +54,6 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
DNALength nCols = 2 * k + 1;
VectorIndex totalMatSize = (qLen + 1) * nCols;
//
// For now the scoreMat and path mat maintained outside this
// function so that they may have different sizes from the affine
......@@ -101,8 +95,7 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
insScoreMat[rc2index(q, k - q, nCols)] = q * insExtend + insOpen;
insPathMat[rc2index(q, k - q, nCols)] = AffineInsUp;
}
}
else if (alignType == TargetFit) {
} else if (alignType == TargetFit) {
//
// Allow free gap penalties at the beginning of the alignment.
//
......@@ -114,7 +107,6 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
}
}
//
// Assign score for (0,0) position in matrix -- aligning a gap to a gap
// which should just be a finished alignment. There is no cost for
......@@ -129,7 +121,8 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
}
for (t = k + 1; t < static_cast<int>(nCols); t++) {
hpInsScoreMat[rc2index(0, t, nCols)] = INF_SCORE;// hpInsOpen + (t - k) * hpInsExtend;//; //INF_SCORE;
hpInsScoreMat[rc2index(0, t, nCols)] =
INF_SCORE; // hpInsOpen + (t - k) * hpInsExtend;//; //INF_SCORE;
hpInsPathMat[t] = NoArrow; //AffineHPInsOpen ; //NoArrow;
insScoreMat[t] = INF_SCORE; //insOpen + (t - k) * insExtend; //INF_SCORE;
insPathMat[t] = NoArrow; //AffineInsOpen; //NoArrow;
......@@ -144,7 +137,6 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
pathMat[rc2index(0, t + k, nCols)] = Left;
}
//
// The recurrence relation here is a slight modification of the
// standard affine gap alignment. Deletions are non-affine. Insertions
......@@ -152,7 +144,6 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
// an affine score for mixed insertions.
//
int matchScore, delScore;
int hpInsExtendScore, hpInsOpenScore, insOpenScore, insExtendScore;
int minHpInsScore, minInsScore;
......@@ -184,8 +175,7 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
hpInsExtendScore = hpInsScoreMat[upper] + hpInsExtend;
else
hpInsExtendScore = INF_SCORE;
}
else {
} else {
hpInsExtendScore = INF_SCORE;
}
......@@ -197,8 +187,7 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
if (hpInsOpenScore < hpInsExtendScore) {
hpInsPathMat[curIndex] = AffineHPInsOpen;
minHpInsScore = hpInsOpenScore;
}
else {
} else {
hpInsPathMat[curIndex] = AffineHPInsUp;
minHpInsScore = hpInsExtendScore;
}
......@@ -207,8 +196,7 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
if (t < q + k) {
insOpenScore = scoreMat[upper] + insOpen;
insExtendScore = insScoreMat[upper] + insExtend;
}
else {
} else {
insOpenScore = INF_SCORE;
insExtendScore = INF_SCORE;
}
......@@ -216,20 +204,17 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
if (insOpenScore < insExtendScore) {
insPathMat[curIndex] = AffineInsOpen;
minInsScore = insOpenScore;
}
else {
} else {
insPathMat[curIndex] = AffineInsUp;
minInsScore = insExtendScore;
}
insScoreMat[curIndex] = minInsScore;
// On left boundary of k-band.
// do not allow deletions of t.
if (t == q - k) {
delScore = INF_SCORE;
}
else {
} else {
// cur row = q
// cur col = t - q
// prev col therefore t - q - 1
......@@ -247,7 +232,8 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
assert(rc2index(q - 1, k + t - q, nCols) < scoreMat.size());
assert(t - 1 >= 0);
assert(q - 1 >= 0);
matchScore = scoreMat[rc2index(q - 1, k + t - q, nCols)] + matchMat[ThreeBit[qSeq.seq[q-1]]][ThreeBit[tSeq.seq[t-1]]];
matchScore = scoreMat[rc2index(q - 1, k + t - q, nCols)] +
matchMat[ThreeBit[qSeq.seq[q - 1]]][ThreeBit[tSeq.seq[t - 1]]];
//
// Possibly on right boundary of k-band, in which
......@@ -259,14 +245,11 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
scoreMat[curIndex] = minScore;
if (minScore == matchScore) {
pathMat[curIndex] = Diagonal;
}
else if (minScore == delScore) {
} else if (minScore == delScore) {
pathMat[curIndex] = Left;
}
else if (minScore == minInsScore) {
} else if (minScore == minInsScore) {
pathMat[curIndex] = AffineInsClose;
}
else {
} else {
pathMat[curIndex] = AffineHPInsClose;
}
}
......@@ -286,7 +269,7 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
std::cout << "normal affine ins path: " << std::endl;
PrintFlatMatrix(&insPathMat[0], qLen + 1, nCols, std::cout);
*/
vector<Arrow> optAlignment;
std::vector<Arrow> optAlignment;
// First find the end position matrix.
int minScoreTPos, minScore;
......@@ -294,15 +277,18 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
if (alignType == Global) {
q = qLen;
t = k - (static_cast<int>(qLen) - static_cast<int>(tLen));
}
else if (alignType == QueryFit) {
} else if (alignType == QueryFit) {
q = qLen;
minScoreTPos = max(q-k,1);
minScoreTPos = std::max(q - k, 1);
DNALength index = rc2index(qLen, k + minScoreTPos - q, nCols);
minScore = scoreMat[index];
for (t = q - k; t < q + k + 1; t++) {
if (t < 1) { continue;}
if (t > static_cast<int>(tLen)) { break;}
if (t < 1) {
continue;
}
if (t > static_cast<int>(tLen)) {
break;
}
int index = rc2index(qLen, k + t - q, nCols);
if (scoreMat[index] < minScore) {
minScoreTPos = t;
......@@ -310,12 +296,12 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
}
}
t = k - (static_cast<int>(qLen) - minScoreTPos);
}
else if (alignType == TargetFit) {
} else if (alignType == TargetFit) {
t = tLen;
int qStart = max(0,min((int)qLen, (int)tLen) - max(0, k - max(((int)tLen) - ((int)qLen), 0)));
int qEnd = min(qLen, tLen + k) + 1;
int qStart = std::max(0, std::min((int)qLen, (int)tLen) -
std::max(0, k - std::max(((int)tLen) - ((int)qLen), 0)));
int qEnd = std::min(qLen, tLen + k) + 1;
minScoreQPos = qStart;
int index = rc2index(minScoreQPos, k - (minScoreQPos - tLen), nCols);
......@@ -336,17 +322,14 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
Arrow arrow;
MatrixLabel curMatrix = Match;
while ((q > 0) or
(q == 0 and t > k)) {
while ((q > 0) or (q == 0 and t > k)) {
assert(t < 2 * k + 1);
if (curMatrix == Match) {
arrow = pathMat[rc2index(q, t, nCols)];
if (arrow == Diagonal) {
optAlignment.push_back(arrow);
q--;
}
else if (arrow == Left) {
} else if (arrow == Left) {
optAlignment.push_back(arrow);
t--;
}
......@@ -357,40 +340,37 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
//
else if (arrow == AffineInsClose) {
curMatrix = AffineIns;
}
else if (arrow == AffineHPInsClose) {
} else if (arrow == AffineHPInsClose) {
curMatrix = AffineHPIns;
}
}
else if (curMatrix == AffineHPIns) {
} else if (curMatrix == AffineHPIns) {
//
// The current
arrow = hpInsPathMat[rc2index(q, t, nCols)];
if (arrow == AffineHPInsOpen) {
curMatrix = Match;
}
else if (arrow != AffineHPInsUp) {
std::cout << "ERROR! Affine homopolymer insertion path matrix MUST only have UP or OPEN arrows." << std::endl;
} else if (arrow != AffineHPInsUp) {
std::cout << "ERROR! Affine homopolymer insertion path matrix MUST only have UP or "
"OPEN arrows."
<< std::endl;
assert(0);
}
optAlignment.push_back(Up);
q--;
t++;
}
else if (curMatrix == AffineIns) {
} else if (curMatrix == AffineIns) {
arrow = insPathMat[rc2index(q, t, nCols)];
if (arrow == AffineInsOpen) {
curMatrix = Match;
}
else if (arrow != AffineInsUp) {
std::cout << "ERROR! Affine insertion path matrix MUST only have UP or OPEN arrows."<<std::endl;
} else if (arrow != AffineInsUp) {
std::cout << "ERROR! Affine insertion path matrix MUST only have UP or OPEN arrows."
<< std::endl;
assert(0);
}
optAlignment.push_back(Up);
q--;
t++;
}
else {
} else {
std::cout << "ERROR in affine local alignment, matrix is: " << curMatrix << std::endl;
assert(0);
}
......@@ -402,5 +382,4 @@ int AffineKBandAlign(T_QuerySequence &pqSeq, T_TargetSequence &ptSeq,
return optScore;
}
#endif // _BLASR_AFFINE_KBAND_ALIGN_HPP_
#ifndef _BLASR_ALIGNMENT_FORMATS_HPP_
#define _BLASR_ALIGNMENT_FORMATS_HPP_
enum AlignmentPrintFormat { StickPrint, SummaryPrint, CompareXML, Vulgar, Interval, CompareSequencesParsable, SAM, BAM, NOFORMAT};
enum AlignmentPrintFormat
{
StickPrint,
SummaryPrint,
CompareXML,
Vulgar,
Interval,
CompareSequencesParsable,
SAM,
BAM,
NOFORMAT
};
#endif
#include <alignment/algorithms/alignment/AlignmentUtils.hpp>
#include <cassert>
#include <vector>
#include <stdint.h>
#include <iostream>
#include <ostream>
#include <cstdint>
#include <cstdlib>
#include <fstream>
#include "AlignmentUtils.hpp"
#include <iostream>
#include <ostream>
#include <vector>
using namespace blasr;
int ComputeAlignmentScore(
std::string &queryStr, std::string &textStr,
int matchScores[5][5], int ins, int del) {
int ComputeAlignmentScore(std::string &queryStr, std::string &textStr, int matchScores[5][5],
int ins, int del)
{
DistanceMatrixScoreFunction<DNASequence, DNASequence> scoreFn(matchScores, ins, del);
return ComputeAlignmentScore(queryStr, textStr, scoreFn);
}
int GetNumberWidth(unsigned int value) {
int GetNumberWidth(unsigned int value)
{
// 0 has a width of 1.
int width = 1;
while (value / 10 > 0) {
......@@ -26,7 +28,8 @@ int GetNumberWidth(unsigned int value) {
return width;
}
int ComputeDrift(Block &cur, Block &next) {
int ComputeDrift(Block &cur, Block &next)
{
int tGap = (next.tPos - cur.TEnd());
int qGap = (next.qPos - cur.QEnd());
......@@ -34,7 +37,7 @@ int ComputeDrift(Block &cur, Block &next) {
int commonGap = 0;
if (tGap > 0 and qGap > 0) {
commonGap = abs(tGap - qGap);
commonGap = std::abs(tGap - qGap);
}
tGap -= commonGap;
qGap -= commonGap;
......
......@@ -2,11 +2,13 @@
#define _BLASR_ALIGNMENT_UTILS_HPP_
#include <string>
#include "../../../pbdata/DNASequence.hpp"
#include "../../datastructures/alignment/Alignment.hpp"
#include "DistanceMatrixScoreFunction.hpp"
enum AlignmentType {
#include <alignment/algorithms/alignment/DistanceMatrixScoreFunction.hpp>
#include <alignment/datastructures/alignment/Alignment.hpp>
#include <pbdata/DNASequence.hpp>
enum AlignmentType
{
Local, // Standard Smith-Waterman
Global, // Standard Needleman-Wuncsh
QueryFit, // No gap penalties from query.
......@@ -53,31 +55,21 @@ enum AlignmentType {
SignificanceLimited
};
inline
int ComputeAlignmentScore(
std::string& queryStr, std::string& textStr,
int matchScores[5][5], int ins, int del);
inline int ComputeAlignmentScore(std::string &queryStr, std::string &textStr, int matchScores[5][5],
int ins, int del);
template <typename T_ScoreFn>
inline
int ComputeAlignmentScore(
std::string& queryStr, std::string& textStr,
T_ScoreFn & scoreFn, bool useAffineScore = false);
inline int ComputeAlignmentScore(std::string &queryStr, std::string &textStr, T_ScoreFn &scoreFn,
bool useAffineScore = false);
template <typename T_QuerySequence, typename T_TargetSequence, typename T_ScoreFn>
int ComputeAlignmentScore(blasr::Alignment& alignment,
T_QuerySequence& query,
T_TargetSequence& text,
T_ScoreFn& scoreFn,
int ComputeAlignmentScore(blasr::Alignment &alignment, T_QuerySequence &query,
T_TargetSequence &text, T_ScoreFn &scoreFn,
bool useAffinePenalty = false);
template <typename T_QuerySequence, typename T_TargetSequence>
int ComputeAlignmentScore(blasr::Alignment &alignment,
T_QuerySequence &query,
T_TargetSequence &text,
int matchScores[5][5],
int ins,
int del);
int ComputeAlignmentScore(blasr::Alignment &alignment, T_QuerySequence &query,
T_TargetSequence &text, int matchScores[5][5], int ins, int del);
int GetNumberWidth(unsigned int value);
......@@ -94,27 +86,28 @@ inline int ReadCompSeqAlignment(std::istream &in, T_Alignment &alignment);
* This should be changed to read any type of alignment, since templates are being
* used.
*/
inline void ReadCompSeqAlignments(std::string &compSeqAlignmentFileName, std::vector<blasr::CompSeqAlignment> &alignments);
inline void ReadCompSeqAlignments(std::string &compSeqAlignmentFileName,
std::vector<blasr::CompSeqAlignment> &alignments);
inline void PrintAlignmentStats(blasr::Alignment &alignment, std::ostream &out);
template <typename T_QuerySequence, typename T_TargetSequence>
void AppendGapCharacters(blasr::Gap &gap,
T_QuerySequence &query, T_TargetSequence &text,
DNALength &q, DNALength &t,
char mismatchChar, char gapChar,
void AppendGapCharacters(blasr::Gap &gap, T_QuerySequence &query, T_TargetSequence &text,
DNALength &q, DNALength &t, char mismatchChar, char gapChar,
std::string &textStr, std::string &alignStr, std::string &queryStr);
template <typename T_Alignment, typename T_QuerySequence, typename T_TargetSequence>
void CreateAlignmentStrings(T_Alignment &alignment,
T_QuerySequence &query, T_TargetSequence &text,
std::string &textStr, std::string &alignStr, std::string &queryStr, DNALength queryLength=0, DNALength textLength=0);
void CreateAlignmentStrings(T_Alignment &alignment, T_QuerySequence &query, T_TargetSequence &text,
std::string &textStr, std::string &alignStr, std::string &queryStr,
DNALength queryLength = 0, DNALength textLength = 0);
template <typename T_Alignment, typename T_ScoreFn>
void ComputeAlignmentStats(T_Alignment & alignment, Nucleotide* qSeq, Nucleotide * tSeq, T_ScoreFn & scoreFn, bool useAffineScore = false);
void ComputeAlignmentStats(T_Alignment &alignment, Nucleotide *qSeq, Nucleotide *tSeq,
T_ScoreFn &scoreFn, bool useAffineScore = false);
template <typename T_Alignment>
void ComputeAlignmentStats(T_Alignment &alignment, Nucleotide* qSeq, Nucleotide *tSeq, int matchMatrix[5][5], int ins, int del);
void ComputeAlignmentStats(T_Alignment &alignment, Nucleotide *qSeq, Nucleotide *tSeq,
int matchMatrix[5][5], int ins, int del);
template <typename T_Alignment>
int ComputeDrift(T_Alignment &alignment);
......@@ -131,8 +124,8 @@ void RemoveAlignmentPrefixGaps(T_Alignment &alignment);
// characters inserted. The location of gaps if found using the byteAlignment.
template <typename T>
void QVsToCmpH5QVs(const std::string &fieldName, const std::string &qvs,
const std::vector<unsigned char> &byteAlignment,
bool isTag, std::vector<T> *gappedQVs);
const std::vector<unsigned char> &byteAlignment, bool isTag,
std::vector<T> *gappedQVs);
#include "AlignmentUtilsImpl.hpp"
......
......@@ -6,18 +6,15 @@
using namespace blasr;
template <typename T_QuerySequence, typename T_TargetSequence, typename T_ScoreFn>
int ComputeAlignmentScore(Alignment &alignment,
T_QuerySequence &query,
T_TargetSequence &text,
T_ScoreFn &scoreFn,
bool useAffinePenalty) {
int ComputeAlignmentScore(Alignment &alignment, T_QuerySequence &query, T_TargetSequence &text,
T_ScoreFn &scoreFn, bool useAffinePenalty)
{
VectorIndex b, q, t, l, gi;
int alignmentScore = 0;
for (b = 0; b < alignment.blocks.size(); b++) {
for ((q = alignment.qPos + alignment.blocks[b].qPos,
t = alignment.tPos + alignment.blocks[b].tPos,
l = 0) ;
t = alignment.tPos + alignment.blocks[b].tPos, l = 0);
l < alignment.blocks[b].length; q++, t++, l++) {
alignmentScore += scoreFn.Match(text, t, query, q);
}
......@@ -25,17 +22,16 @@ int ComputeAlignmentScore(Alignment &alignment,
for (gi = 0; gi < alignment.gaps[b + 1].size(); gi++) {
if (alignment.gaps[b + 1][gi].seq == Gap::Target) {
if (useAffinePenalty) {
alignmentScore += scoreFn.affineOpen + alignment.gaps[b+1][gi].length * scoreFn.affineExtend;
}
else {
alignmentScore += scoreFn.affineOpen +
alignment.gaps[b + 1][gi].length * scoreFn.affineExtend;
} else {
alignmentScore += alignment.gaps[b + 1][gi].length * scoreFn.ins;
}
}
else {
} else {
if (useAffinePenalty) {
alignmentScore += scoreFn.affineOpen + alignment.gaps[b+1][gi].length * scoreFn.affineExtend;
}
else {
alignmentScore += scoreFn.affineOpen +
alignment.gaps[b + 1][gi].length * scoreFn.affineExtend;
} else {
alignmentScore += alignment.gaps[b + 1][gi].length * scoreFn.del;
}
}
......@@ -46,33 +42,29 @@ int ComputeAlignmentScore(Alignment &alignment,
}
template <typename T_QuerySequence, typename T_TargetSequence>
int ComputeAlignmentScore(blasr::Alignment &alignment,
T_QuerySequence &query,
T_TargetSequence &text,
int matchScores[5][5],
int ins,
int del) {
int ComputeAlignmentScore(blasr::Alignment &alignment, T_QuerySequence &query,
T_TargetSequence &text, int matchScores[5][5], int ins, int del)
{
DistanceMatrixScoreFunction<DNASequence, DNASequence> scoreFn(matchScores, ins, del);
return ComputeAlignmentScore(alignment, query, text, scoreFn);
}
template <typename T_ScoreFn>
int ComputeAlignmentScore(std::string &queryStr, std::string &textStr, T_ScoreFn &scoreFn,
bool useAffineScore) {
bool useAffineScore)
{
if (queryStr.size() != textStr.size()) {
std::cout << "Computing alignment score using invalid alignment string." << std::endl;
std::cout << "Bailing out." << std::endl;
exit(1);
std::exit(EXIT_FAILURE);
}
VectorIndex i;
int score = 0;
VectorIndex alignStrLen = queryStr.size();
for (i = 0; i < alignStrLen; i++) {
if (queryStr[i] != '-' and
textStr[i] != '-') {
if (queryStr[i] != '-' and textStr[i] != '-') {
score += scoreFn.scoreMatrix[ThreeBit[(int)queryStr[i]]][ThreeBit[(int)textStr[i]]];
}
else {
} else {
if (useAffineScore) {
//
// Compute affine gap scoring. For now this uses symmetric insertion/deletion penalties.
......@@ -89,18 +81,15 @@ int ComputeAlignmentScore(std::string &queryStr, std::string &textStr, T_ScoreFn
// will be at the end of the gap.
//
i = gapEnd - 1;
}
else {
} else {
//
// Use non-affine gap scoring.
//
if (queryStr[i] == '-' and textStr[i] != '-') {
score += scoreFn.del;
}
else if (queryStr[i] != '-' and textStr[i] == '-') {
} else if (queryStr[i] != '-' and textStr[i] == '-') {
score += scoreFn.ins;
}
else {
} else {
score += scoreFn.scoreMatrix[4][4];
}
}
......@@ -113,7 +102,9 @@ int ComputeAlignmentScore(std::string &queryStr, std::string &textStr, T_ScoreFn
* This should be changed to read any type of alignment, since templates are being
* used.
*/
inline void ReadCompSeqAlignments(std::string &compSeqAlignmentFileName, std::vector<CompSeqAlignment> &alignments) {
inline void ReadCompSeqAlignments(std::string &compSeqAlignmentFileName,
std::vector<CompSeqAlignment> &alignments)
{
std::ifstream in;
CrucialOpen(compSeqAlignmentFileName, in);
CompSeqAlignment alignment;
......@@ -122,7 +113,8 @@ inline void ReadCompSeqAlignments(std::string &compSeqAlignmentFileName, std::ve
}
}
inline void PrintAlignmentStats(Alignment &alignment, std::ostream &out) {
inline void PrintAlignmentStats(Alignment &alignment, std::ostream &out)
{
out << " nMatch: " << alignment.nMatch << std::endl;
out << " nMisMatch: " << alignment.nMismatch << std::endl;
out << " nIns: " << alignment.nIns << std::endl;
......@@ -132,19 +124,17 @@ inline void PrintAlignmentStats(Alignment &alignment, std::ostream &out) {
}
template <typename T_Alignment>
inline void PrintCompareSequencesAlignmentStats(T_Alignment &alignment, std::ostream &out) {
inline void PrintCompareSequencesAlignmentStats(T_Alignment &alignment, std::ostream &out)
{
int lastBlock;
lastBlock = alignment.blocks.size() - 1;
int qLength, tLength;
if (lastBlock >= 0) {
qLength = (alignment.blocks[lastBlock].qPos
+ alignment.blocks[lastBlock].length) ;
tLength = (alignment.blocks[lastBlock].tPos
+ alignment.blocks[lastBlock].length);
}
else {
qLength = (alignment.blocks[lastBlock].qPos + alignment.blocks[lastBlock].length);
tLength = (alignment.blocks[lastBlock].tPos + alignment.blocks[lastBlock].length);
} else {
qLength = tLength = 0;
}
......@@ -154,53 +144,41 @@ inline void PrintCompareSequencesAlignmentStats(T_Alignment &alignment, std::ost
int alignmentQStart;
if (lastBlock >= 0) {
alignmentQStart = alignment.qPos + alignment.qAlignedSeqPos;
}
else {
} else {
alignmentQStart = alignment.qAlignedSeqPos;
}
out << alignment.qName
<< " " << alignment.qLength
<< " " << alignmentQStart
<< " " << alignmentQStart + qLength;
out << alignment.qName << " " << alignment.qLength << " " << alignmentQStart << " "
<< alignmentQStart + qLength;
if (alignment.qStrand == 0) {
out << " + ";
}
else {
} else {
out << " - ";
}
int alignmentTStart;
if (lastBlock >= 0) {
alignmentTStart = alignment.tPos + alignment.tAlignedSeqPos;
}
else {
} else {
alignmentTStart = 0;
}
out << " " << alignment.tName
<< " " << alignment.tLength;
out << " " << alignment.tName << " " << alignment.tLength;
if (alignment.tStrand == 0) {
out << " " << alignmentTStart
<< " " << alignmentTStart + tLength;
out << " " << alignmentTStart << " " << alignmentTStart + tLength;
out << " + ";
}
else {
out << " " << alignment.tLength - (alignmentTStart + tLength)
<< " " << alignment.tLength - (alignmentTStart);
} else {
out << " " << alignment.tLength - (alignmentTStart + tLength) << " "
<< alignment.tLength - (alignmentTStart);
out << " - ";
}
out << alignment.score
<< " " << alignment.nMatch
<< " " << alignment.nMismatch
<< " " << alignment.nIns
<< " " << alignment.nDel
<< " " << (int) alignment.mapQV
<< " ";
out << alignment.score << " " << alignment.nMatch << " " << alignment.nMismatch << " "
<< alignment.nIns << " " << alignment.nDel << " " << (int)alignment.mapQV << " ";
}
template <typename T_Alignment>
inline int ReadCompareSequencesAlignmentStats(std::istream &in, T_Alignment &alignment) {
inline int ReadCompareSequencesAlignmentStats(std::istream &in, T_Alignment &alignment)
{
int qEnd, tEnd;
int qLength, tLength;
char qStrand, tStrand;
......@@ -222,9 +200,9 @@ inline int ReadCompareSequencesAlignmentStats(std::istream &in, T_Alignment &ali
return 1;
}
template <typename T_Alignment>
inline int ReadCompSeqAlignment(std::istream &in, T_Alignment &alignment) {
inline int ReadCompSeqAlignment(std::istream &in, T_Alignment &alignment)
{
if (!ReadCompareSequencesAlignmentStats(in, alignment)) return 0;
std::string alignStr;
if (!(in >> alignment.qString)) return 0;
......@@ -235,13 +213,11 @@ inline int ReadCompSeqAlignment(std::istream &in, T_Alignment &alignment) {
return 1;
}
template <typename T_QuerySequence, typename T_TargetSequence>
void AppendGapCharacters(Gap &gap,
T_QuerySequence &query, T_TargetSequence &text,
DNALength &q, DNALength &t,
char mismatchChar, char gapChar,
std::string &textStr, std::string &alignStr, std::string &queryStr) {
void AppendGapCharacters(Gap &gap, T_QuerySequence &query, T_TargetSequence &text, DNALength &q,
DNALength &t, char mismatchChar, char gapChar, std::string &textStr,
std::string &alignStr, std::string &queryStr)
{
int gp;
for (gp = 0; gp < gap.length; gp++) {
if (gap.seq == Gap::Query) {
......@@ -249,8 +225,7 @@ void AppendGapCharacters(Gap &gap,
alignStr.push_back(mismatchChar);
queryStr.push_back(gapChar);
t++;
}
else if (gap.seq == Gap::Target) {
} else if (gap.seq == Gap::Target) {
textStr.push_back(gapChar);
alignStr.push_back(mismatchChar);
queryStr.push_back(query[q]);
......@@ -260,10 +235,19 @@ void AppendGapCharacters(Gap &gap,
}
template <typename T_Alignment, typename T_QuerySequence, typename T_TargetSequence>
void CreateAlignmentStrings(T_Alignment &alignment,
T_QuerySequence &query, T_TargetSequence &text,
void CreateAlignmentStrings(T_Alignment &alignment, T_QuerySequence &query, T_TargetSequence &text,
std::string &textStr, std::string &alignStr, std::string &queryStr,
DNALength queryLength, DNALength textLength) {
DNALength
#ifndef NDEBUG
queryLength
#endif
,
DNALength
#ifndef NDEBUG
textLength
#endif
)
{
DNALength q = alignment.qPos;
DNALength t = alignment.tPos;
DNALength qPos, tPos;
......@@ -283,8 +267,7 @@ void CreateAlignmentStrings(T_Alignment &alignment,
//
// If there is no gap list, add the gaps as an offset here.
//
if (alignment.blocks[0].qPos > 0 or
alignment.blocks[0].tPos > 0) {
if (alignment.blocks[0].qPos > 0 or alignment.blocks[0].tPos > 0) {
// commonGapLen should be the shorter gap.
qPos = alignment.blocks[0].qPos;
tPos = alignment.blocks[0].tPos;
......@@ -331,7 +314,8 @@ void CreateAlignmentStrings(T_Alignment &alignment,
// The first gap is before the first block of characters.
DNALength gi;
for (gi = 0; gi < alignment.gaps[0].size(); gi++) {
AppendGapCharacters(alignment.gaps[0][gi], query, text, q, t, mismatchChar, gapChar, textStr, alignStr, queryStr);
AppendGapCharacters(alignment.gaps[0][gi], query, text, q, t, mismatchChar, gapChar,
textStr, alignStr, queryStr);
}
}
......@@ -341,8 +325,7 @@ void CreateAlignmentStrings(T_Alignment &alignment,
textStr.push_back(text[t]);
assert(queryLength == 0 or q < queryLength);
assert(textLength == 0 or t < textLength);
if (TwoBit[query[q]] !=
TwoBit[text[t]])
if (TwoBit[query[q]] != TwoBit[text[t]])
alignStr.push_back(mismatchChar);
else
alignStr.push_back(matchChar);
......@@ -353,23 +336,21 @@ void CreateAlignmentStrings(T_Alignment &alignment,
// There are no gaps to count after the last block, so
// don't add the gapped characters for this.
//
if (alignment.blocks.size() == 0)
continue;
if (alignment.blocks.size() == 0) continue;
if (b == alignment.blocks.size() - 1) {
continue;
}
if (alignment.gaps.size() > 0) {
for (gi = 0; gi < alignment.gaps[b + 1].size(); gi++) {
AppendGapCharacters(alignment.gaps[b+1][gi], query, text, q, t, gapSeparationChar, gapChar, textStr, alignStr, queryStr);
AppendGapCharacters(alignment.gaps[b + 1][gi], query, text, q, t, gapSeparationChar,
gapChar, textStr, alignStr, queryStr);
}
}
else {
} else {
DNALength queryGapLen = (alignment.blocks[b+1].qPos -
alignment.blocks[b].qPos - alignment.blocks[b].length);
DNALength textGapLen = (alignment.blocks[b+1].tPos -
alignment.blocks[b].tPos - alignment.blocks[b].length);
DNALength queryGapLen = (alignment.blocks[b + 1].qPos - alignment.blocks[b].qPos -
alignment.blocks[b].length);
DNALength textGapLen = (alignment.blocks[b + 1].tPos - alignment.blocks[b].tPos -
alignment.blocks[b].length);
if (queryGapLen > 0 or textGapLen > 0) {
// commonGapLen should be the shorter gap.
......@@ -389,7 +370,6 @@ void CreateAlignmentStrings(T_Alignment &alignment,
textStr.push_back(text[t]);
alignStr.push_back(gapSeparationChar);
queryStr.push_back(gapChar);
}
for (g = 0; g < commonGapLen; g++) {
......@@ -404,9 +384,10 @@ void CreateAlignmentStrings(T_Alignment &alignment,
}
}
template <typename T_Alignment, typename T_ScoreFn>
void ComputeAlignmentStats(T_Alignment & alignment, Nucleotide* qSeq, Nucleotide * tSeq, T_ScoreFn & scoreFn, bool useAffineScore) {
void ComputeAlignmentStats(T_Alignment &alignment, Nucleotide *qSeq, Nucleotide *tSeq,
T_ScoreFn &scoreFn, bool useAffineScore)
{
(void)(useAffineScore);
int qp = 0, tp = 0;
int nMatch = 0, nMismatch = 0, nIns = 0, nDel = 0;
......@@ -422,18 +403,15 @@ void ComputeAlignmentStats(T_Alignment & alignment, Nucleotide* qSeq, Nucleotide
int qi = (int)queryStr[i];
if (ThreeBit[ti] == ThreeBit[qi]) {
nMatch++;
}
else {
} else {
nMismatch++;
}
tp++;
qp++;
}
else if (textStr[i] == '-' and queryStr[i] != '-') {
} else if (textStr[i] == '-' and queryStr[i] != '-') {
nIns++;
qp++;
}
else if (queryStr[i] == '-' and textStr[i] != '-') {
} else if (queryStr[i] == '-' and textStr[i] != '-') {
nDel++;
tp++;
}
......@@ -441,12 +419,10 @@ void ComputeAlignmentStats(T_Alignment & alignment, Nucleotide* qSeq, Nucleotide
if (tp + qp > 0) {
if (textStr.size() + queryStr.size() > 0) {
pctSimilarity = (nMatch * 2.0) / (textStr.size() + queryStr.size()) * 100;
}
else {
} else {
pctSimilarity = 0;
}
}
else {
} else {
pctSimilarity = 0;
}
......@@ -459,32 +435,32 @@ void ComputeAlignmentStats(T_Alignment & alignment, Nucleotide* qSeq, Nucleotide
}
template <typename T_Alignment>
void ComputeAlignmentStats(T_Alignment &alignment, Nucleotide* qSeq, Nucleotide *tSeq, int matchMatrix[5][5], int ins, int del) {
void ComputeAlignmentStats(T_Alignment &alignment, Nucleotide *qSeq, Nucleotide *tSeq,
int matchMatrix[5][5], int ins, int del)
{
DistanceMatrixScoreFunction<DNASequence, DNASequence> scoreFn(matchMatrix, ins, del);
ComputeAlignmentStats(alignment, qSeq, tSeq, scoreFn);
}
template <typename T_Alignment>
int ComputeDrift(T_Alignment &alignment) {
int ComputeDrift(T_Alignment &alignment)
{
VectorIndex b;
int drift = 0;
int maxDrift = 0;
int driftBetweenBlocks;
if (alignment.blocks.size() == 0)
return 0;
if (alignment.blocks.size() == 0) return 0;
for (b = 0; b < alignment.blocks.size() - 1; b++) {
driftBetweenBlocks = ComputeDrift(alignment.blocks[b], alignment.blocks[b + 1]);
drift += driftBetweenBlocks;
if (abs(drift) > maxDrift)
maxDrift = abs(drift);
if (std::abs(drift) > maxDrift) maxDrift = std::abs(drift);
}
return maxDrift;
}
template <typename T_Alignment>
void RemoveAlignmentPrefixGaps(T_Alignment &alignment) {
void RemoveAlignmentPrefixGaps(T_Alignment &alignment)
{
if (alignment.gaps.size() == 0) {
return;
}
......@@ -494,8 +470,7 @@ void RemoveAlignmentPrefixGaps(T_Alignment &alignment) {
for (g = 0; g < alignment.gaps[0].size(); g++) {
if (alignment.gaps[0][g].seq == Gap::Target) {
qStart += alignment.gaps[0][g].length;
}
else if (alignment.gaps[0][g].seq == Gap::Query) {
} else if (alignment.gaps[0][g].seq == Gap::Query) {
tStart += alignment.gaps[0][g].length;
}
}
......@@ -511,10 +486,9 @@ void RemoveAlignmentPrefixGaps(T_Alignment &alignment) {
}
template <typename T>
void QVsToCmpH5QVs(const std::string &qvs,
const std::vector<unsigned char> &byteAlignment,
bool isTag,
std::vector<T> *gappedQVs) {
void QVsToCmpH5QVs(const std::string &qvs, const std::vector<unsigned char> &byteAlignment,
bool isTag, std::vector<T> *gappedQVs)
{
gappedQVs->clear();
unsigned int qv_i = 0;
......
#include "BaseScoreFunction.hpp"
#include <alignment/algorithms/alignment/BaseScoreFunction.hpp>
BaseScoreFunction::BaseScoreFunction(int insP, int delP, int subPriorP,
int delPriorP, int affineExtensionP, int affineOpenP) {
BaseScoreFunction::BaseScoreFunction(int insP, int delP, int subPriorP, int delPriorP,
int affineExtensionP, int affineOpenP)
{
ins = insP;
del = delP;
substitutionPrior = subPriorP;
......
#ifndef _BLASR_BASE_SCORE_FUNCTION_HPP_
#define _BLASR_BASE_SCORE_FUNCTION_HPP_
class BaseScoreFunction {
class BaseScoreFunction
{
public:
int ins;
int del;
......@@ -10,8 +11,8 @@ class BaseScoreFunction {
int affineExtend;
int affineOpen;
BaseScoreFunction(int insP = 0, int delP = 0, int subPriorP = 0,
int delPriorP = 0, int affineExtensionP = 0, int affineOpenP = 0);
BaseScoreFunction(int insP = 0, int delP = 0, int subPriorP = 0, int delPriorP = 0,
int affineExtensionP = 0, int affineOpenP = 0);
};
#endif // _BLASR_BASE_SCORE_FUNCTION_HPP_`
#ifndef _BLASR_DISTANCE_MATRIX_SCORE_FUNCTION_HPP_
#define _BLASR_DISTANCE_MATRIX_SCORE_FUNCTION_HPP_
#include "BaseScoreFunction.hpp"
#include "ScoreMatrices.hpp"
#include <pbdata/Types.h>
#include <alignment/algorithms/alignment/BaseScoreFunction.hpp>
#include <alignment/algorithms/alignment/ScoreMatrices.hpp>
template <typename T_RefSequence, typename T_QuerySequence>
class DistanceMatrixScoreFunction : public BaseScoreFunction {
class DistanceMatrixScoreFunction : public BaseScoreFunction
{
public:
int scoreMatrix[5][5];
DistanceMatrixScoreFunction();
......@@ -29,10 +31,12 @@ public:
int Insertion(T_QuerySequence &seq, DNALength pos);
float NormalizedMatch(T_RefSequence &ref, DNALength refPos, T_QuerySequence &query, DNALength queryPos);
float NormalizedInsertion(T_RefSequence &ref, DNALength refPos, T_QuerySequence &query, DNALength queryPos);
float NormalizedDeletion(T_RefSequence &ref, DNALength refPos, T_QuerySequence &query, DNALength queryPos);
float NormalizedMatch(T_RefSequence &ref, DNALength refPos, T_QuerySequence &query,
DNALength queryPos);
float NormalizedInsertion(T_RefSequence &ref, DNALength refPos, T_QuerySequence &query,
DNALength queryPos);
float NormalizedDeletion(T_RefSequence &ref, DNALength refPos, T_QuerySequence &query,
DNALength queryPos);
};
#include "DistanceMatrixScoreFunctionImpl.hpp"
......
#ifndef _BLASR_DISTANCE_MATRIX_SCORE_FUNCTION_IMPL_HPP_
#define _BLASR_DISTANCE_MATRIX_SCORE_FUNCTION_IMPL_HPP_
#include <string>
#include <iostream>
#include <stdint.h>
#include <string>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <ostream>
// pbdata
#include "../../../pbdata/Types.h"
#include "../../../pbdata/NucConversion.hpp"
#include "../../../pbdata/Enumerations.h"
#include "../../../pbdata/DNASequence.hpp"
#include "../../../pbdata/FASTASequence.hpp"
#include "../../../pbdata/FASTQSequence.hpp"
#include <string>
#include <string>
#include "DistanceMatrixScoreFunction.hpp"
#include <pbdata/Enumerations.h>
#include <pbdata/Types.h>
#include <pbdata/DNASequence.hpp>
#include <pbdata/FASTASequence.hpp>
#include <pbdata/FASTQSequence.hpp>
#include <pbdata/NucConversion.hpp>
template <typename T_RefSequence, typename T_QuerySequence>
void DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>::InitializeScoreMatrix(int scoreMatrixP[5][5]) {
void DistanceMatrixScoreFunction<T_RefSequence, T_QuerySequence>::InitializeScoreMatrix(
int scoreMatrixP[5][5])
{
int i, j;
for (i = 0; i < 5; i++) {
for (j = 0; j < 5; j++) {
......@@ -27,44 +28,60 @@ void DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>::InitializeScore
}
template <typename T_RefSequence, typename T_QuerySequence>
DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>::DistanceMatrixScoreFunction() : BaseScoreFunction() {
DistanceMatrixScoreFunction<T_RefSequence, T_QuerySequence>::DistanceMatrixScoreFunction()
: BaseScoreFunction()
{
}
template <typename T_RefSequence, typename T_QuerySequence>
DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>::DistanceMatrixScoreFunction(int scoreMatrixP[5][5], int insertionP, int deletionP) : BaseScoreFunction() {
DistanceMatrixScoreFunction<T_RefSequence, T_QuerySequence>::DistanceMatrixScoreFunction(
int scoreMatrixP[5][5], int insertionP, int deletionP)
: BaseScoreFunction()
{
InitializeScoreMatrix(scoreMatrixP);
ins = insertionP;
del = deletionP;
}
template <typename T_RefSequence, typename T_QuerySequence>
int DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>::Deletion(
T_RefSequence &seq, DNALength pos, T_QuerySequence &querySeq,
DNALength queryPos) {
(void)(seq); (void)(pos); (void)(querySeq); (void)(queryPos);
int DistanceMatrixScoreFunction<T_RefSequence, T_QuerySequence>::Deletion(T_RefSequence &seq,
DNALength pos,
T_QuerySequence &querySeq,
DNALength queryPos)
{
(void)(seq);
(void)(pos);
(void)(querySeq);
(void)(queryPos);
return del;
}
template <typename T_RefSequence, typename T_QuerySequence>
int DistanceMatrixScoreFunction<T_RefSequence, T_QuerySequence>::Insertion(
T_RefSequence &seq, DNALength pos, T_QuerySequence &querySeq,
DNALength queryPos) {
(void)(seq); (void)(pos); (void)(querySeq); (void)(queryPos);
T_RefSequence &seq, DNALength pos, T_QuerySequence &querySeq, DNALength queryPos)
{
(void)(seq);
(void)(pos);
(void)(querySeq);
(void)(queryPos);
return ins;
}
template <typename T_RefSequence, typename T_QuerySequence>
int DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>::Deletion(
T_RefSequence &seq, DNALength pos) {
(void)(seq); (void)(pos);
int DistanceMatrixScoreFunction<T_RefSequence, T_QuerySequence>::Deletion(T_RefSequence &seq,
DNALength pos)
{
(void)(seq);
(void)(pos);
return del;
}
template <typename T_RefSequence, typename T_QuerySequence>
int DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>::Match(
T_RefSequence &ref, DNALength refPos, T_QuerySequence &query,
DNALength queryPos) {
int DistanceMatrixScoreFunction<T_RefSequence, T_QuerySequence>::Match(T_RefSequence &ref,
DNALength refPos,
T_QuerySequence &query,
DNALength queryPos)
{
return scoreMatrix[ThreeBit[ref[refPos]]][ThreeBit[query[queryPos]]];
}
......@@ -72,40 +89,52 @@ int DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>::Match(
// Define the score function on dereferenced pointers for speed.
//
template <typename T_RefSequence, typename T_QuerySequence>
int DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>::Match(
Nucleotide ref, Nucleotide query) {
int DistanceMatrixScoreFunction<T_RefSequence, T_QuerySequence>::Match(Nucleotide ref,
Nucleotide query)
{
return scoreMatrix[ThreeBit[ref]][ThreeBit[query]];
}
template <typename T_RefSequence, typename T_QuerySequence>
int DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>::Insertion(
T_QuerySequence &seq, DNALength pos) {
(void)(seq); (void)(pos);
int DistanceMatrixScoreFunction<T_RefSequence, T_QuerySequence>::Insertion(T_QuerySequence &seq,
DNALength pos)
{
(void)(seq);
(void)(pos);
return ins;
}
template <typename T_RefSequence, typename T_QuerySequence>
float
DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>
::NormalizedMatch(T_RefSequence &ref, DNALength refPos,
T_QuerySequence &query, DNALength queryPos) {
(void)(ref); (void)(refPos); (void)(query); (void)(queryPos);
float DistanceMatrixScoreFunction<T_RefSequence, T_QuerySequence>::NormalizedMatch(
T_RefSequence &ref, DNALength refPos, T_QuerySequence &query, DNALength queryPos)
{
(void)(ref);
(void)(refPos);
(void)(query);
(void)(queryPos);
return 0;
}
template <typename T_RefSequence, typename T_QuerySequence>
float DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>
::NormalizedInsertion(T_RefSequence &ref, DNALength refPos,
T_QuerySequence &query, DNALength queryPos) {
(void)(ref); (void)(refPos); (void)(query); (void)(queryPos);
float DistanceMatrixScoreFunction<T_RefSequence, T_QuerySequence>::NormalizedInsertion(
T_RefSequence &ref, DNALength refPos, T_QuerySequence &query, DNALength queryPos)
{
(void)(ref);
(void)(refPos);
(void)(query);
(void)(queryPos);
return 0;
}
template <typename T_RefSequence, typename T_QuerySequence>
float DistanceMatrixScoreFunction<T_RefSequence,T_QuerySequence>::NormalizedDeletion(T_RefSequence &ref, DNALength refPos, T_QuerySequence &query, DNALength queryPos) {
(void)(ref); (void)(refPos); (void)(query); (void)(queryPos);
float DistanceMatrixScoreFunction<T_RefSequence, T_QuerySequence>::NormalizedDeletion(
T_RefSequence &ref, DNALength refPos, T_QuerySequence &query, DNALength queryPos)
{
(void)(ref);
(void)(refPos);
(void)(query);
(void)(queryPos);
return 0;
}
#endif // _BLASR_DISTANCE_MATRIX_SCORE_FUNCTION_IMPL_HPP_
#include "ExtendAlign.hpp"
#include <alignment/algorithms/alignment/ExtendAlign.hpp>
RCToIndex::RCToIndex() {
qStart = 0; tStart = 0;
RCToIndex::RCToIndex()
{
qStart = 0;
tStart = 0;
band = middleCol = nCols = 0;
}
int RCToIndex::operator()(int r, int c, int &index) {
int RCToIndex::operator()(int r, int c, int &index)
{
//
// First do some error checking on the row and column to see if it
// is within the band.
//
if (r < qStart) { return 0; }
if (c < tStart) { return 0; }
if (r < qStart) {
return 0;
}
if (c < tStart) {
return 0;
}
r -= qStart;
c -= tStart;
if (std::abs(r-c) > band) { return 0; } // outside band range.
if (c < 0) { return 0; }
if (middleCol - (r - c) >= nCols) { return 0; }
index = (r*nCols) + (middleCol - (r - c));
return 1;
if (std::abs(r - c) > band) {
return 0;
} // outside band range.
if (c < 0) {
return 0;
}
int BaseIndex::QNotAtSeqBoundary(int q) {
return q != queryAlignLength;
}
int BaseIndex::TNotAtSeqBoundary(int t) {
return t != refAlignLength;
if (middleCol - (r - c) >= nCols) {
return 0;
}
int BaseIndex::QAlignLength() {
return queryAlignLength;
index = (r * nCols) + (middleCol - (r - c));
return 1;
}
int BaseIndex::TAlignLength() {
return refAlignLength;
}
int BaseIndex::QNotAtSeqBoundary(int q) { return q != queryAlignLength; }
int BaseIndex::TNotAtSeqBoundary(int t) { return t != refAlignLength; }
int BaseIndex::QAlignLength() { return queryAlignLength; }
int BaseIndex::TAlignLength() { return refAlignLength; }
int ForwardIndex::QuerySeqPos(int q) {
return queryPos + q;
}
int ForwardIndex::QuerySeqPos(int q) { return queryPos + q; }
int ForwardIndex::RefSeqPos(int t) {
return refPos + t;
}
int ForwardIndex::RefSeqPos(int t) { return refPos + t; }
int ForwardIndex::GetQueryStartPos(int startQ, int endQ) {
int ForwardIndex::GetQueryStartPos(int startQ, int endQ)
{
(void)(endQ);
return queryPos + startQ + 1;
}
int ForwardIndex::GetRefStartPos(int startT, int endT) {
int ForwardIndex::GetRefStartPos(int startT, int endT)
{
(void)(endT);
return refPos + startT + 1;
}
void ForwardIndex::OrderArrowVector(std::vector<Arrow> &mat) {
reverse(mat.begin(), mat.end());
}
void ForwardIndex::OrderArrowVector(std::vector<Arrow> &mat) { reverse(mat.begin(), mat.end()); }
int ReverseIndex::QuerySeqPos(int q) {
return queryPos - q;
}
int ReverseIndex::QuerySeqPos(int q) { return queryPos - q; }
int ReverseIndex::RefSeqPos(int t) {
return refPos - t;
}
int ReverseIndex::RefSeqPos(int t) { return refPos - t; }
int ReverseIndex::GetQueryStartPos(int startQ, int endQ) {
int ReverseIndex::GetQueryStartPos(int startQ, int endQ)
{
(void)(startQ);
return queryPos - (endQ - 1);
}
int ReverseIndex::GetRefStartPos(int startT, int endT) {
int ReverseIndex::GetRefStartPos(int startT, int endT)
{
(void)(startT);
return refPos - (endT - 1);
}
void ReverseIndex::OrderArrowVector(std::vector<Arrow> &mat) {
(void)(mat);
}
void ReverseIndex::OrderArrowVector(std::vector<Arrow> &mat) { (void)(mat); }
#ifndef _BLASR_EXTEND_ALIGN_HPP_
#define _BLASR_EXTEND_ALIGN_HPP_
#include <algorithm>
#include <iosfwd>
#include <vector>
#include <algorithm>
// pbdata
#include "../../../pbdata/defs.h"
#include "../../../pbdata/NucConversion.hpp"
#include "../../../pbdata/matrix/FlatMatrix.hpp"
#include "../../datastructures/alignment/Alignment.hpp"
#include "KBandAlign.hpp"
#include <pbdata/defs.h>
#include <alignment/algorithms/alignment/KBandAlign.hpp>
#include <alignment/datastructures/alignment/Alignment.hpp>
#include <pbdata/NucConversion.hpp>
#include <pbdata/matrix/FlatMatrix.hpp>
//FIXME: change data type of target pos from int to GenomeLength
// in order to support > 4G genome.
......@@ -21,7 +21,8 @@
// No need to change data type of query pos to DNALength,
// since it's unlikely to have > 2G bases per zmw.
class RCToIndex {
class RCToIndex
{
public:
int qStart, tStart;
int middleCol;
......@@ -33,7 +34,8 @@ public:
int operator()(int r, int c, int &index);
};
class BaseIndex {
class BaseIndex
{
public:
int queryPos, refPos;
int queryAlignLength, refAlignLength;
......@@ -47,9 +49,9 @@ public:
int TAlignLength();
};
class ForwardIndex : public BaseIndex {
class ForwardIndex : public BaseIndex
{
public:
int QuerySeqPos(int q);
int RefSeqPos(int t);
......@@ -61,10 +63,9 @@ public:
void OrderArrowVector(std::vector<Arrow> &mat);
};
class ReverseIndex : public BaseIndex {
class ReverseIndex : public BaseIndex
{
public:
int QuerySeqPos(int q);
int RefSeqPos(int t);
......@@ -76,19 +77,11 @@ public:
void OrderArrowVector(std::vector<Arrow> &mat);
};
template<typename T_Alignment,
typename T_ScoreFn,
typename T_QuerySeq,
typename T_RefSeq,
template <typename T_Alignment, typename T_ScoreFn, typename T_QuerySeq, typename T_RefSeq,
typename T_Index>
int ExtendAlignment(T_QuerySeq &querySeq, int queryPos,
T_RefSeq &refSeq, int refPos,
int k,
std::vector<int> &scoreMat,
std::vector<Arrow> &pathMat,
T_Alignment &alignment,
T_ScoreFn &scoreFn,
T_Index &index,
int ExtendAlignment(T_QuerySeq &querySeq, int queryPos, T_RefSeq &refSeq, int refPos, int k,
std::vector<int> &scoreMat, std::vector<Arrow> &pathMat, T_Alignment &alignment,
T_ScoreFn &scoreFn, T_Index &index,
int minExtendNBases = 1, // Require that there
// are more than one
// base to align.
......@@ -101,7 +94,8 @@ template<typename T_Alignment,
// that one may have before
// terminating the alignment
//
) {
)
{
PB_UNUSED(queryPos);
PB_UNUSED(refPos);
//
......@@ -122,8 +116,7 @@ template<typename T_Alignment,
rcToIndex.nCols = nCols;
rcToIndex.middleCol = k + 2 - 1;
if (index.queryAlignLength < minExtendNBases or
index.refAlignLength < minExtendNBases) {
if (index.queryAlignLength < minExtendNBases or index.refAlignLength < minExtendNBases) {
//
// One of the sequences isn't long enough to even try to extend,
// just bail with an empty alignment.
......@@ -149,8 +142,8 @@ template<typename T_Alignment,
int t;
// Initialize first column for insertions.
int firstIndex;
fill(scoreMat.begin(), scoreMat.begin() + matSize, 0);
fill(pathMat.begin(), pathMat.begin() + matSize, NoArrow);
std::fill(scoreMat.begin(), scoreMat.begin() + matSize, 0);
std::fill(pathMat.begin(), pathMat.begin() + matSize, NoArrow);
rcToIndex(0, 0, firstIndex);
scoreMat[firstIndex] = 0;
pathMat[firstIndex] = NoArrow;
......@@ -166,10 +159,9 @@ template<typename T_Alignment,
int qSeqPos = index.QuerySeqPos(q - 1);
scoreMat[i] = scoreMat[pi] + scoreFn.Insertion(querySeq, qSeqPos);
pathMat[i] = Up;
// cout << "initializing insertion gap penalty for " << q << " " << refPos-1 << " " << i << " " << scoreMat[i] << endl;
// std::cout << "initializing insertion gap penalty for " << q << " " << refPos-1 << " " << i << " " << scoreMat[i] << std::endl;
}
// Initialize the first row for deletions.
q = 0;
......@@ -182,12 +174,12 @@ template<typename T_Alignment,
int qSeqPos = index.QuerySeqPos(0);
scoreMat[i] = scoreMat[previ] + scoreFn.Deletion(querySeq, qSeqPos);
pathMat[i] = Left;
// cout << "initializing deletion gap penalty for " << ((int)queryPos)-1 << " " << t << " " << i << " " << scoreMat[i] << endl;
// std::cout << "initializing deletion gap penalty for " << ((int)queryPos)-1 << " " << t << " " << i << " " << scoreMat[i] << std::endl;
}
/* PrintFlatMatrix(&scoreMat[0], k , nCols, cout);
cout << endl;
PrintFlatMatrix(&pathMat[0], k, nCols, cout);
cout << endl;
/* PrintFlatMatrix(&scoreMat[0], k , nCols, std::cout);
std::cout << std::endl;
PrintFlatMatrix(&pathMat[0], k, nCols, std::cout);
std::cout << std::endl;
*/
int nDrops = 0;
int prevRowMinScore = INF_INT;
......@@ -199,9 +191,7 @@ template<typename T_Alignment,
int maxAlignLength = std::min(index.QAlignLength(), index.TAlignLength()) + maxNDrops;
for (q = 1; (index.QNotAtSeqBoundary(q-1) and
nDrops < maxNDrops and
q < maxAlignLength);
for (q = 1; (index.QNotAtSeqBoundary(q - 1) and nDrops < maxNDrops and q < maxAlignLength);
q++) {
//
......@@ -225,7 +215,8 @@ template<typename T_Alignment,
for (t = tStart; t < std::min(tEnd, maxAlignLength); t++) {
int insIndex, delIndex, matchIndex;
bool hasInsIndex = false, hasDelIndex = false, hasMatchIndex = false, hasCurIndex = false;
bool hasInsIndex = false, hasDelIndex = false, hasMatchIndex = false,
hasCurIndex = false;
hasCurIndex = rcToIndex(q, t, curIndex);
assert(hasCurIndex);
......@@ -238,51 +229,62 @@ template<typename T_Alignment,
delScore = INF_INT;
insScore = INF_INT;
matchScore = INF_INT;
// cout << "ins index: " << insIndex << " del: " << delIndex << " match index " << matchIndex << endl;
// std::cout << "ins index: " << insIndex << " del: " << delIndex << " match index " << matchIndex << std::endl;
qSeqPos = index.QuerySeqPos(q - 1); // The offset is to allow for the boundary buffer.
tSeqPos = index.RefSeqPos(t - 1); // ditto.
/* if (scoreMat[insIndex] == -1) {
cout << "bleh" << endl;
std::cout << "bleh" << std::endl;
}
if (scoreMat[matchIndex] == -1) {
cout << "bleh" << endl;
std::cout << "bleh" << std::endl;
}
if (scoreMat[delIndex] == -1) {
cout << "bleh" << endl;
std::cout << "bleh" << std::endl;
}
if (scoreFn.Insertion(refSeq, (DNALength) tSeqPos, querySeq, (DNALength) qSeqPos) == -1) {
cout << "bleh" << endl;
std::cout << "bleh" << std::endl;
}
if (scoreFn.Deletion(refSeq, (DNALength) tSeqPos, querySeq, (DNALength) qSeqPos) == -1) {
cout << "ugh" << endl;
std::cout << "ugh" << std::endl;
}
if ( scoreFn.Match(refSeq, (DNALength) tSeqPos, querySeq, (DNALength) qSeqPos) == -1) {
cout <<" gah" << endl;
std::cout <<" gah" << std::endl;
}*/
if (hasInsIndex) {
insScore = scoreMat[insIndex] + scoreFn.Insertion(refSeq, (DNALength) tSeqPos, querySeq, (DNALength) qSeqPos);
insScore =
scoreMat[insIndex] +
scoreFn.Insertion(refSeq, (DNALength)tSeqPos, querySeq, (DNALength)qSeqPos);
}
if (hasDelIndex) {
delScore = scoreMat[delIndex] + scoreFn.Deletion(refSeq, (DNALength) tSeqPos, querySeq, (DNALength) qSeqPos);
delScore =
scoreMat[delIndex] +
scoreFn.Deletion(refSeq, (DNALength)tSeqPos, querySeq, (DNALength)qSeqPos);
}
if (hasMatchIndex) {
matchScore = scoreMat[matchIndex] + scoreFn.Match(refSeq, (DNALength) tSeqPos, querySeq, (DNALength) qSeqPos);
matchScore =
scoreMat[matchIndex] +
scoreFn.Match(refSeq, (DNALength)tSeqPos, querySeq, (DNALength)qSeqPos);
}
/* cout << "ins score: " << insScore << "[" << scoreMat[insIndex] << "] del score " << delScore
/* std::cout << "ins score: " << insScore << "[" << scoreMat[insIndex] << "] del score " << delScore
<< " [" << scoreMat[delIndex] << "] match score " << matchScore
<< " [" << scoreMat[matchIndex] << "] qchar " << (int) querySeq.seq[qSeqPos] << " tchar " << (int) refSeq.seq[tSeqPos] << endl;*/
<< " [" << scoreMat[matchIndex] << "] qchar " << (int) querySeq.seq[qSeqPos] << " tchar " << (int) refSeq.seq[tSeqPos] << std::endl;*/
int minScore = std::min(matchScore, delScore);
minScore = std::min(minScore, insScore);
scoreMat[curIndex] = minScore;
// cout << "extend: " << qSeqPos << " " << tSeqPos << " " << minScore << endl;
// std::cout << "extend: " << qSeqPos << " " << tSeqPos << " " << minScore << std::endl;
if (minScore != INF_INT) {
if (minScore == insScore) { pathMat[curIndex] = Up; }
if (minScore == delScore) { pathMat[curIndex] = Left; }
if (minScore == matchScore) { pathMat[curIndex] = Diagonal; }
if (minScore == insScore) {
pathMat[curIndex] = Up;
}
else {
if (minScore == delScore) {
pathMat[curIndex] = Left;
}
if (minScore == matchScore) {
pathMat[curIndex] = Diagonal;
}
} else {
pathMat[curIndex] = NoArrow;
}
......@@ -295,7 +297,6 @@ template<typename T_Alignment,
globalMinScoreQPos = q;
globalMinScoreTPos = t;
}
}
if (curRowMinScore > prevRowMinScore) {
......@@ -328,11 +329,9 @@ template<typename T_Alignment,
if (arrow == Diagonal) {
q--;
t--;
}
else if (arrow == Left) {
} else if (arrow == Left) {
t--;
}
else if (arrow == Up) {
} else if (arrow == Up) {
q--;
}
}
......@@ -347,13 +346,9 @@ template<typename T_Alignment,
}
template <typename T_Alignment, typename T_ScoreFn, typename T_QuerySeq, typename T_RefSeq>
int ExtendAlignmentForward(T_QuerySeq &querySeq, int queryPos,
T_RefSeq &refSeq, int refPos,
int k,
std::vector<int> &scoreMat,
std::vector<Arrow> &pathMat,
T_Alignment &alignment,
T_ScoreFn &scoreFn,
int ExtendAlignmentForward(T_QuerySeq &querySeq, int queryPos, T_RefSeq &refSeq, int refPos, int k,
std::vector<int> &scoreMat, std::vector<Arrow> &pathMat,
T_Alignment &alignment, T_ScoreFn &scoreFn,
int minExtendNBases = 1, // Require that there
// are more than one
// base to align.
......@@ -366,7 +361,8 @@ int ExtendAlignmentForward(T_QuerySeq &querySeq, int queryPos,
// that one may have before
// terminating the alignment
//
) {
)
{
ForwardIndex forwardIndex;
forwardIndex.queryPos = queryPos;
......@@ -377,10 +373,7 @@ int ExtendAlignmentForward(T_QuerySeq &querySeq, int queryPos,
forwardIndex.queryAlignLength = querySeq.length - queryPos;
forwardIndex.refAlignLength = refSeq.length - refPos;
int alignScore;
alignScore= ExtendAlignment(querySeq, queryPos,
refSeq, refPos,
k,
scoreMat, pathMat,
alignScore = ExtendAlignment(querySeq, queryPos, refSeq, refPos, k, scoreMat, pathMat,
alignment, scoreFn, forwardIndex, minExtendNBases, maxNDrops);
alignment.qPos = queryPos;
alignment.tPos = refPos;
......@@ -388,13 +381,9 @@ int ExtendAlignmentForward(T_QuerySeq &querySeq, int queryPos,
}
template <typename T_Alignment, typename T_ScoreFn, typename T_QuerySeq, typename T_RefSeq>
int ExtendAlignmentReverse(T_QuerySeq &querySeq, int queryPos,
T_RefSeq &refSeq, int refPos,
int k,
std::vector<int> &scoreMat,
std::vector<Arrow> &pathMat,
T_Alignment &alignment,
T_ScoreFn &scoreFn,
int ExtendAlignmentReverse(T_QuerySeq &querySeq, int queryPos, T_RefSeq &refSeq, int refPos, int k,
std::vector<int> &scoreMat, std::vector<Arrow> &pathMat,
T_Alignment &alignment, T_ScoreFn &scoreFn,
int minExtendNBases = 1, // Require that there
// are more than one
// base to align.
......@@ -407,7 +396,8 @@ int ExtendAlignmentReverse(T_QuerySeq &querySeq, int queryPos,
// that one may have before
// terminating the alignment
//
) {
)
{
ReverseIndex reverseIndex;
reverseIndex.queryPos = queryPos - 1;
......@@ -415,14 +405,10 @@ int ExtendAlignmentReverse(T_QuerySeq &querySeq, int queryPos,
reverseIndex.queryAlignLength = queryPos;
reverseIndex.refAlignLength = refPos;
int alignScore;
alignScore = ExtendAlignment(querySeq, queryPos,
refSeq, refPos,
k,
scoreMat, pathMat,
alignScore = ExtendAlignment(querySeq, queryPos, refSeq, refPos, k, scoreMat, pathMat,
alignment, scoreFn, reverseIndex, minExtendNBases, maxNDrops);
return alignScore;
}
#endif // _BLASR_EXTEND_ALIGN_HPP_