Skip to content
GitLab
Explore
Sign in
Register
Commits on Source (2)
New upstream version 1.4.6
· 2b5f2030
Andreas Tille
authored
Sep 17, 2019
2b5f2030
New upstream version 1.4.7
· 63959b12
Andreas Tille
authored
Sep 18, 2019
63959b12
Show whitespace changes
Inline
Side-by-side
CMakeLists.txt
View file @
63959b12
cmake_minimum_required
(
VERSION 3.2
)
project
(
racon
)
set
(
racon_version 1.4.7
)
set
(
CMAKE_ARCHIVE_OUTPUT_DIRECTORY
${
PROJECT_BINARY_DIR
}
/lib
)
set
(
CMAKE_LIBRARY_OUTPUT_DIRECTORY
${
PROJECT_BINARY_DIR
}
/lib
)
...
...
@@ -42,6 +43,9 @@ else()
add_executable
(
racon
${
racon_sources
}
)
endif
()
# Add version information to bibary.
target_compile_definitions
(
racon PRIVATE RACON_VERSION=
"v
${
racon_version
}
"
)
if
(
NOT TARGET bioparser
)
add_subdirectory
(
vendor/bioparser EXCLUDE_FROM_ALL
)
endif
()
...
...
@@ -139,3 +143,9 @@ if (racon_build_wrapper)
add_subdirectory
(
vendor/rampler
)
endif
()
endif
()
# Add Debian packaging
SET
(
CPACK_GENERATOR
"DEB"
)
SET
(
CPACK_DEBIAN_PACKAGE_MAINTAINER
"Robert Vaser"
)
set
(
CPACK_PACKAGE_VERSION
"
${
racon_version
}
"
)
include
(
CPack
)
README.md
View file @
63959b12
# Racon
[

](https://github.com/
isovic
/racon/releases/latest)
[

](https://travis-ci.org/
isovic
/racon)
[

](https://github.com/
lbcb-sci
/racon/releases/latest)
[

](https://travis-ci.org/
lbcb-sci
/racon)
[

](https://doi.org/10.1101/gr.214270.116)
Consensus module for raw de novo DNA assembly of long uncorrected reads.
...
...
@@ -24,13 +24,13 @@ A **wrapper script** is also available to enable easier usage to the end-user fo
### CUDA Support
1.
gcc 5.0+
2.
cmake 3.10+
4.
CUDA
10
.0+
4.
CUDA
9
.0+
## Installation
To install Racon run the following commands:
```
bash
git clone
--recursive
https://github.com/
isovic
/racon.git racon
git clone
--recursive
https://github.com/
lbcb-sci
/racon.git racon
cd
racon
mkdir
build
cd
build
...
...
@@ -62,6 +62,13 @@ make
***Note***
: Short read polishing with CUDA is still in development!
### Packaging
To generate a Debian package for
`racon`
, run the following command from the build folder -
```
bash
make package
```
## Usage
Usage of
`racon`
is as following:
...
...
@@ -92,14 +99,16 @@ Usage of `racon` is as following:
-e, --error-threshold <float>
default: 0.3
maximum allowed error rate used for filtering overlaps
--no-trimming
disables consensus trimming at window ends
-m, --match <int>
default:
5
default:
3
score for matching bases
-x, --mismatch <int>
default: -
4
default: -
5
score for mismatching bases
-g, --gap <int>
default: -
8
default: -
4
gap penalty (must be negative)
-t, --threads <int>
default: 1
...
...
@@ -115,7 +124,7 @@ Usage of `racon` is as following:
number of batches for CUDA accelerated polishing
-b, --cuda-banded-alignment
use banding approximation for polishing on GPU. Only applicable when -c is used.
--cudaaligner-batches
(experimental)
--cudaaligner-batches
Number of batches for CUDA accelerated alignment
`racon_test`
is run without any parameters.
...
...
src/cuda/cudaaligner.cpp
View file @
63959b12
...
...
@@ -4,7 +4,7 @@
* @brief CUDABatchAligner class source file
*/
#include
<c
uda
utils/cudautils.hpp>
#include
<c
laragenomics/
utils/cudautils.hpp>
#include
"cudaaligner.hpp"
...
...
@@ -27,19 +27,21 @@ CUDABatchAligner::CUDABatchAligner(uint32_t max_query_size,
uint32_t
max_target_size
,
uint32_t
max_alignments
,
uint32_t
device_id
)
:
aligner_
(
claragenomics
::
cudaaligner
::
create_aligner
(
max_query_size
,
max_target_size
,
max_alignments
,
claragenomics
::
cudaaligner
::
AlignmentType
::
global
,
device_id
))
,
overlaps_
()
:
overlaps_
()
,
stream_
(
0
)
{
bid_
=
CUDABatchAligner
::
batches
++
;
CGA_CU_CHECK_ERR
(
cudaSetDevice
(
device_id
));
CGA_CU_CHECK_ERR
(
cudaStreamCreate
(
&
stream_
));
aligner_
->
set_cuda_stream
(
stream_
);
aligner_
=
claragenomics
::
cudaaligner
::
create_aligner
(
max_query_size
,
max_target_size
,
max_alignments
,
claragenomics
::
cudaaligner
::
AlignmentType
::
global
,
stream_
,
device_id
);
}
CUDABatchAligner
::~
CUDABatchAligner
()
...
...
@@ -51,11 +53,14 @@ bool CUDABatchAligner::addOverlap(Overlap* overlap, std::vector<std::unique_ptr<
{
const
char
*
q
=
!
overlap
->
strand_
?
&
(
sequences
[
overlap
->
q_id_
]
->
data
()[
overlap
->
q_begin_
])
:
&
(
sequences
[
overlap
->
q_id_
]
->
reverse_complement
()[
overlap
->
q_length_
-
overlap
->
q_end_
]);
int32_t
q_len
=
overlap
->
q_end_
-
overlap
->
q_begin_
;
const
char
*
t
=
&
(
sequences
[
overlap
->
t_id_
]
->
data
()[
overlap
->
t_begin_
]);
int32_t
t_len
=
overlap
->
t_end_
-
overlap
->
t_begin_
;
claragenomics
::
cudaaligner
::
StatusType
s
=
aligner_
->
add_alignment
(
q
,
overlap
->
q_end_
-
overlap
->
q_begin_
,
t
,
overlap
->
t_end_
-
overlap
->
t_begin_
);
// NOTE: The cudaaligner API for adding alignments is the opposite of edlib. Hence, what is
// treated as target in edlib is query in cudaaligner and vice versa.
claragenomics
::
cudaaligner
::
StatusType
s
=
aligner_
->
add_alignment
(
t
,
t_len
,
q
,
q_len
);
if
(
s
==
claragenomics
::
cudaaligner
::
StatusType
::
exceeded_max_alignments
)
{
return
false
;
...
...
@@ -63,8 +68,8 @@ bool CUDABatchAligner::addOverlap(Overlap* overlap, std::vector<std::unique_ptr<
else
if
(
s
==
claragenomics
::
cudaaligner
::
StatusType
::
exceeded_max_alignment_difference
||
s
==
claragenomics
::
cudaaligner
::
StatusType
::
exceeded_max_length
)
{
cpu_overlap_data_
.
emplace_back
(
std
::
make_pair
<
std
::
string
,
std
::
string
>
(
std
::
string
(
q
,
q
+
overlap
->
q_end_
-
overlap
->
q_begin_
),
std
::
string
(
t
,
t
+
overlap
->
t_end_
-
overlap
->
t_begin_
)));
cpu_overlap_data_
.
emplace_back
(
std
::
make_pair
<
std
::
string
,
std
::
string
>
(
std
::
string
(
q
,
q
+
q_len
),
std
::
string
(
t
,
t
+
t_len
)));
cpu_overlaps_
.
push_back
(
overlap
);
}
else
if
(
s
!=
claragenomics
::
cudaaligner
::
StatusType
::
success
)
...
...
src/cuda/cudaaligner.hpp
View file @
63959b12
...
...
@@ -3,9 +3,9 @@
*
* @brief CUDA aligner class header file
*/
#include
"
cudaaligner/cudaaligner.hpp
"
#include
"
cudaaligner/aligner.hpp
"
#include
"
cudaaligner/alignment.hpp
"
#include
<claragenomics/
cudaaligner/cudaaligner.hpp
>
#include
<claragenomics/
cudaaligner/aligner.hpp
>
#include
<claragenomics/
cudaaligner/alignment.hpp
>
#include
"overlap.hpp"
#include
"sequence.hpp"
...
...
src/cuda/cudabatch.cpp
View file @
63959b12
...
...
@@ -13,28 +13,53 @@
#include
"cudautils.hpp"
#include
"spoa/spoa.hpp"
#include
<c
uda
utils/cudautils.hpp>
#include
<c
laragenomics/
utils/cudautils.hpp>
namespace
racon
{
std
::
atomic
<
uint32_t
>
CUDABatchProcessor
::
batches
;
std
::
unique_ptr
<
CUDABatchProcessor
>
createCUDABatch
(
uint32_t
max_windows
,
uint32_t
max_window_depth
,
uint32_t
device
,
int8_t
gap
,
int8_t
mismatch
,
int8_t
match
,
bool
cuda_banded_alignment
)
{
return
std
::
unique_ptr
<
CUDABatchProcessor
>
(
new
CUDABatchProcessor
(
max_windows
,
max_window_depth
,
device
,
gap
,
mismatch
,
match
,
cuda_banded_alignment
));
std
::
unique_ptr
<
CUDABatchProcessor
>
createCUDABatch
(
uint32_t
max_window_depth
,
uint32_t
device
,
size_t
avail_mem
,
int8_t
gap
,
int8_t
mismatch
,
int8_t
match
,
bool
cuda_banded_alignment
)
{
return
std
::
unique_ptr
<
CUDABatchProcessor
>
(
new
CUDABatchProcessor
(
max_window_depth
,
device
,
avail_mem
,
gap
,
mismatch
,
match
,
cuda_banded_alignment
));
}
CUDABatchProcessor
::
CUDABatchProcessor
(
uint32_t
max_windows
,
uint32_t
max_window_depth
,
uint32_t
device
,
int8_t
gap
,
int8_t
mismatch
,
int8_t
match
,
bool
cuda_banded_alignment
)
:
max_windows_
(
max_windows
)
,
cudapoa_batch_
(
claragenomics
::
cudapoa
::
create_batch
(
max_windows
,
max_window_depth
,
device
,
claragenomics
::
cudapoa
::
OutputType
::
consensus
,
gap
,
mismatch
,
match
,
cuda_banded_alignment
))
,
windows_
()
CUDABatchProcessor
::
CUDABatchProcessor
(
uint32_t
max_window_depth
,
uint32_t
device
,
size_t
avail_mem
,
int8_t
gap
,
int8_t
mismatch
,
int8_t
match
,
bool
cuda_banded_alignment
)
:
windows_
()
,
seqs_added_per_window_
()
{
bid_
=
CUDABatchProcessor
::
batches
++
;
// Create new CUDA stream.
CGA_CU_CHECK_ERR
(
cudaStreamCreate
(
&
stream_
));
cudapoa_batch_
->
set_cuda_stream
(
stream_
);
cudapoa_batch_
=
claragenomics
::
cudapoa
::
create_batch
(
max_window_depth
,
device
,
stream_
,
avail_mem
,
claragenomics
::
cudapoa
::
OutputType
::
consensus
,
gap
,
mismatch
,
match
,
cuda_banded_alignment
);
}
CUDABatchProcessor
::~
CUDABatchProcessor
()
...
...
@@ -45,79 +70,21 @@ CUDABatchProcessor::~CUDABatchProcessor()
bool
CUDABatchProcessor
::
addWindow
(
std
::
shared_ptr
<
Window
>
window
)
{
if
(
windows_
.
size
()
<
max_windows_
)
{
windows_
.
push_back
(
window
);
seqs_added_per_window_
.
push_back
(
0
);
return
true
;
}
else
{
return
false
;
}
}
bool
CUDABatchProcessor
::
hasWindows
()
const
{
return
(
windows_
.
size
()
!=
0
);
}
void
CUDABatchProcessor
::
convertPhredQualityToWeights
(
const
char
*
qual
,
uint32_t
qual_length
,
std
::
vector
<
int8_t
>&
weights
)
{
weights
.
clear
();
for
(
uint32_t
i
=
0
;
i
<
qual_length
;
i
++
)
{
weights
.
push_back
(
static_cast
<
uint8_t
>
(
qual
[
i
])
-
33
);
// PHRED quality
}
}
claragenomics
::
cudapoa
::
StatusType
CUDABatchProcessor
::
addSequenceToPoa
(
std
::
pair
<
const
char
*
,
uint32_t
>&
seq
,
std
::
pair
<
const
char
*
,
uint32_t
>&
qualities
)
{
// Add sequences to latest poa in batch.
std
::
vector
<
int8_t
>
weights
;
claragenomics
::
cudapoa
::
StatusType
status
=
claragenomics
::
cudapoa
::
StatusType
::
success
;
if
(
qualities
.
first
==
nullptr
)
{
status
=
cudapoa_batch_
->
add_seq_to_poa
(
seq
.
first
,
nullptr
,
seq
.
second
);
}
else
{
convertPhredQualityToWeights
(
qualities
.
first
,
qualities
.
second
,
weights
);
status
=
cudapoa_batch_
->
add_seq_to_poa
(
seq
.
first
,
weights
.
data
(),
seq
.
second
);
}
return
status
;
}
void
CUDABatchProcessor
::
generateMemoryMap
()
{
auto
num_windows
=
windows_
.
size
();
for
(
uint32_t
w
=
0
;
w
<
num_windows
;
w
++
)
{
// Add new poa
claragenomics
::
cudapoa
::
StatusType
status
=
cudapoa_batch_
->
add_poa
();
if
(
status
!=
claragenomics
::
cudapoa
::
StatusType
::
success
)
{
fprintf
(
stderr
,
"Failed to add new POA to batch %d.
\n
"
,
cudapoa_batch_
->
batch_id
());
exit
(
1
);
}
std
::
shared_ptr
<
Window
>
window
=
windows_
.
at
(
w
);
claragenomics
::
cudapoa
::
Group
poa_group
;
uint32_t
num_seqs
=
window
->
sequences_
.
size
();
std
::
vector
<
uint8_t
>
weights
;
std
::
vector
<
std
::
vector
<
int8_t
>>
all_read_weights
(
num_seqs
,
std
::
vector
<
int8_t
>
())
;
// Add first sequence as backbone to graph.
std
::
pair
<
const
char
*
,
uint32_t
>
seq
=
window
->
sequences_
.
front
();
std
::
pair
<
const
char
*
,
uint32_t
>
qualities
=
window
->
qualities_
.
front
();
status
=
addSequenceToPoa
(
seq
,
qualities
);
if
(
status
!=
claragenomics
::
cudapoa
::
StatusType
::
success
)
{
fprintf
(
stderr
,
"Could not add backbone to window. Fatal error.
\n
"
);
exit
(
1
);
}
std
::
vector
<
int8_t
>
backbone_weights
;
convertPhredQualityToWeights
(
qualities
.
first
,
qualities
.
second
,
all_read_weights
[
0
]);
claragenomics
::
cudapoa
::
Entry
e
=
{
seq
.
first
,
all_read_weights
[
0
].
data
(),
static_cast
<
int32_t
>
(
seq
.
second
)
};
poa_group
.
push_back
(
e
);
// Add the rest of the sequences in sorted order of starting positions.
std
::
vector
<
uint32_t
>
rank
;
...
...
@@ -138,27 +105,56 @@ void CUDABatchProcessor::generateMemoryMap()
uint32_t
i
=
rank
.
at
(
j
);
seq
=
window
->
sequences_
.
at
(
i
);
qualities
=
window
->
qualities_
.
at
(
i
);
// Add sequences to latest poa in batch.
status
=
addSequenceToPoa
(
seq
,
qualities
);
if
(
status
==
claragenomics
::
cudapoa
::
StatusType
::
exceeded_maximum_sequence_size
)
convertPhredQualityToWeights
(
qualities
.
first
,
qualities
.
second
,
all_read_weights
[
i
]);
claragenomics
::
cudapoa
::
Entry
p
=
{
seq
.
first
,
all_read_weights
[
i
].
data
(),
static_cast
<
int32_t
>
(
seq
.
second
)
};
poa_group
.
push_back
(
p
);
}
// Add group to CUDAPOA batch object.
std
::
vector
<
claragenomics
::
cudapoa
::
StatusType
>
entry_status
;
claragenomics
::
cudapoa
::
StatusType
status
=
cudapoa_batch_
->
add_poa_group
(
entry_status
,
poa_group
);
// If group was added, then push window in accepted windows list.
if
(
status
!=
claragenomics
::
cudapoa
::
StatusType
::
success
)
{
return
false
;
}
else
{
windows_
.
push_back
(
window
);
}
// Keep track of how many sequences were actually processed for this
// group. This acts as the effective coverage for that window.
int32_t
seq_added
=
0
;
for
(
uint32_t
i
=
1
;
i
<
entry_status
.
size
();
i
++
)
{
if
(
entry_status
[
i
]
==
claragenomics
::
cudapoa
::
StatusType
::
exceeded_maximum_sequence_size
)
{
long_seq
++
;
continue
;
}
else
if
(
status
==
claragenomics
::
cudapoa
::
StatusType
::
exceeded_maximum_sequences_per_poa
)
else
if
(
entry_
status
[
i
]
==
claragenomics
::
cudapoa
::
StatusType
::
exceeded_maximum_sequences_per_poa
)
{
skipped_seq
++
;
continue
;
}
else
if
(
status
!=
claragenomics
::
cudapoa
::
StatusType
::
success
)
else
if
(
entry_
status
[
i
]
!=
claragenomics
::
cudapoa
::
StatusType
::
success
)
{
fprintf
(
stderr
,
"Could not add sequence to POA in batch %d.
\n
"
,
cudapoa_batch_
->
batch_id
());
exit
(
1
);
}
seqs_added_per_window_
[
w
]
=
seqs_added_per_window_
[
w
]
+
1
;
seq_added
++
;
}
seqs_added_per_window_
.
push_back
(
seq_added
);
#ifndef NDEBUG
if
(
long_seq
>
0
)
{
...
...
@@ -169,6 +165,23 @@ void CUDABatchProcessor::generateMemoryMap()
fprintf
(
stderr
,
"Skipped (%d / %d)
\n
"
,
skipped_seq
,
num_seqs
);
}
#endif
return
true
;
}
bool
CUDABatchProcessor
::
hasWindows
()
const
{
return
(
cudapoa_batch_
->
get_total_poas
()
>
0
);
}
void
CUDABatchProcessor
::
convertPhredQualityToWeights
(
const
char
*
qual
,
uint32_t
qual_length
,
std
::
vector
<
int8_t
>&
weights
)
{
weights
.
clear
();
for
(
uint32_t
i
=
0
;
i
<
qual_length
;
i
++
)
{
weights
.
push_back
(
static_cast
<
uint8_t
>
(
qual
[
i
])
-
33
);
// PHRED quality
}
}
...
...
@@ -197,6 +210,7 @@ void CUDABatchProcessor::getConsensus()
{
// This is a special case borrowed from the CPU version.
// TODO: We still run this case through the GPU, but could take it out.
bool
consensus_status
=
false
;
if
(
window
->
sequences_
.
size
()
<
3
)
{
window
->
consensus_
=
std
::
string
(
window
->
sequences_
.
front
().
first
,
...
...
@@ -204,11 +218,11 @@ void CUDABatchProcessor::getConsensus()
// This status is borrowed from the CPU version which considers this
// a failed consensus. All other cases are true.
window_
consensus_status
_
.
emplace_back
(
false
)
;
consensus_status
=
false
;
}
else
{
window
->
consensus_
=
consensuses
.
at
(
i
)
;
window
->
consensus_
=
consensuses
[
i
]
;
if
(
window
->
type_
==
WindowType
::
kTGS
)
{
uint32_t
num_seqs_in_window
=
seqs_added_per_window_
[
i
];
...
...
@@ -216,12 +230,12 @@ void CUDABatchProcessor::getConsensus()
int32_t
begin
=
0
,
end
=
window
->
consensus_
.
size
()
-
1
;
for
(;
begin
<
static_cast
<
int32_t
>
(
window
->
consensus_
.
size
());
++
begin
)
{
if
(
coverages
.
at
(
i
).
at
(
begin
)
>=
average_coverage
)
{
if
(
coverages
[
i
][
begin
]
>=
average_coverage
)
{
break
;
}
}
for
(;
end
>=
0
;
--
end
)
{
if
(
coverages
.
at
(
i
).
at
(
end
)
>=
average_coverage
)
{
if
(
coverages
[
i
][
end
]
>=
average_coverage
)
{
break
;
}
}
...
...
@@ -229,12 +243,14 @@ void CUDABatchProcessor::getConsensus()
if
(
begin
>=
end
)
{
fprintf
(
stderr
,
"[CUDABatchProcessor] warning: "
"contig might be chimeric in window %lu!
\n
"
,
window
->
id_
);
consensus_status
=
false
;
}
else
{
window
->
consensus_
=
window
->
consensus_
.
substr
(
begin
,
end
-
begin
+
1
);
consensus_status
=
true
;
}
}
window_consensus_status_
.
emplace_back
(
true
);
}
window_consensus_status_
.
emplace_back
(
consensus_status
);
}
}
}
...
...
@@ -242,7 +258,6 @@ void CUDABatchProcessor::getConsensus()
const
std
::
vector
<
bool
>&
CUDABatchProcessor
::
generateConsensus
()
{
// Generate consensus for all windows in the batch
generateMemoryMap
();
generatePOA
();
getConsensus
();
...
...
src/cuda/cudabatch.hpp
View file @
63959b12
...
...
@@ -11,7 +11,7 @@
#include
<atomic>
#include
"window.hpp"
#include
"
cudapoa/batch.hpp
"
#include
<claragenomics/
cudapoa/batch.hpp
>
namespace
spoa
{
class
AlignmentEngine
;
...
...
@@ -22,7 +22,7 @@ namespace racon {
class
Window
;
class
CUDABatchProcessor
;
std
::
unique_ptr
<
CUDABatchProcessor
>
createCUDABatch
(
uint32_t
max_windows
,
uint32_t
max_window_depth
,
uint32_t
device
,
int8_t
gap
,
int8_t
mismatch
,
int8_t
match
,
bool
cuda_banded_alignment
);
std
::
unique_ptr
<
CUDABatchProcessor
>
createCUDABatch
(
uint32_t
max_window_depth
,
uint32_t
device
,
size_t
avail_mem
,
int8_t
gap
,
int8_t
mismatch
,
int8_t
match
,
bool
cuda_banded_alignment
);
class
CUDABatchProcessor
{
...
...
@@ -65,27 +65,19 @@ public:
// Builder function to create a new CUDABatchProcessor object.
friend
std
::
unique_ptr
<
CUDABatchProcessor
>
createCUDABatch
(
uint32_t
max_windows
,
uint32_t
max_window_depth
,
uint32_t
device
,
int8_t
gap
,
int8_t
mismatch
,
int8_t
match
,
bool
cuda_banded_alignment
);
createCUDABatch
(
uint32_t
max_window_depth
,
uint32_t
device
,
size_t
avail_mem
,
int8_t
gap
,
int8_t
mismatch
,
int8_t
match
,
bool
cuda_banded_alignment
);
protected
:
/**
* @brief Constructor for CUDABatch class.
*
* @param[in] max_windows : Maximum number windows in batch
* @param[in] max_window_depth : Maximum number of sequences per window
* @param[in] cuda_banded_alignment : Use banded POA alignment
*/
CUDABatchProcessor
(
uint32_t
max_windows
,
uint32_t
max_window_depth
,
uint32_t
device
,
int8_t
gap
,
int8_t
mismatch
,
int8_t
match
,
bool
cuda_banded_alignment
);
CUDABatchProcessor
(
uint32_t
max_window_depth
,
uint32_t
device
,
size_t
avail_mem
,
int8_t
gap
,
int8_t
mismatch
,
int8_t
match
,
bool
cuda_banded_alignment
);
CUDABatchProcessor
(
const
CUDABatchProcessor
&
)
=
delete
;
const
CUDABatchProcessor
&
operator
=
(
const
CUDABatchProcessor
&
)
=
delete
;
/*
* @brief Process all the windows and re-map them into
* memory for more efficient processing in the CUDA
* kernels.
*/
void
generateMemoryMap
();
/*
* @brief Run the CUDA kernel for generating POA on the batch.
* This call is asynchronous.
...
...
@@ -106,13 +98,6 @@ protected:
uint32_t
qual_length
,
std
::
vector
<
int8_t
>&
weights
);
/*
* @brief Add sequence and qualities to cudapoa.
*
*/
claragenomics
::
cudapoa
::
StatusType
addSequenceToPoa
(
std
::
pair
<
const
char
*
,
uint32_t
>&
seq
,
std
::
pair
<
const
char
*
,
uint32_t
>&
quality
);
protected
:
// Static batch count used to generate batch IDs.
static
std
::
atomic
<
uint32_t
>
batches
;
...
...
@@ -120,9 +105,6 @@ protected:
// Batch ID.
uint32_t
bid_
=
0
;
// Maximum windows allowed in batch.
uint32_t
max_windows_
;
// CUDA-POA library object that manages POA batch.
std
::
unique_ptr
<
claragenomics
::
cudapoa
::
Batch
>
cudapoa_batch_
;
...
...
src/cuda/cudapolisher.cpp
View file @
63959b12
...
...
@@ -11,7 +11,7 @@
#include
"sequence.hpp"
#include
"cudapolisher.hpp"
#include
<c
uda
utils/cudautils.hpp>
#include
<c
laragenomics/
utils/cudautils.hpp>
#include
"bioparser/bioparser.hpp"
#include
"logger/logger.hpp"
...
...
@@ -27,12 +27,12 @@ CUDAPolisher::CUDAPolisher(std::unique_ptr<bioparser::Parser<Sequence>> sparser,
std
::
unique_ptr
<
bioparser
::
Parser
<
Overlap
>>
oparser
,
std
::
unique_ptr
<
bioparser
::
Parser
<
Sequence
>>
tparser
,
PolisherType
type
,
uint32_t
window_length
,
double
quality_threshold
,
double
error_threshold
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
double
error_threshold
,
bool
trim
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
uint32_t
num_threads
,
uint32_t
cudapoa_batches
,
bool
cuda_banded_alignment
,
uint32_t
cudaaligner_batches
)
:
Polisher
(
std
::
move
(
sparser
),
std
::
move
(
oparser
),
std
::
move
(
tparser
),
type
,
window_length
,
quality_threshold
,
error_threshold
,
match
,
mismatch
,
gap
,
num_threads
)
type
,
window_length
,
quality_threshold
,
error_threshold
,
trim
,
match
,
mismatch
,
gap
,
num_threads
)
,
cudapoa_batches_
(
cudapoa_batches
)
,
cudaaligner_batches_
(
cudaaligner_batches
)
,
gap_
(
gap
)
...
...
@@ -82,6 +82,7 @@ std::vector<uint32_t> CUDAPolisher::calculate_batches_per_gpu(uint32_t batches,
return
batches_per_gpu
;
}
void
CUDAPolisher
::
find_overlap_breaking_points
(
std
::
vector
<
std
::
unique_ptr
<
Overlap
>>&
overlaps
)
{
if
(
cudaaligner_batches_
<
1
)
...
...
@@ -92,7 +93,7 @@ void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Over
else
{
// TODO: Experimentally this is giving decent perf
const
uint32_t
MAX_ALIGNMENTS
=
5
0
;
const
uint32_t
MAX_ALIGNMENTS
=
20
0
;
logger_
->
log
();
std
::
mutex
mutex_overlaps
;
...
...
@@ -149,7 +150,6 @@ void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Over
else
if
(
logger_step
!=
0
&&
log_bar_idx
<
static_cast
<
int32_t
>
(
RACON_LOGGER_BIN_SIZE
))
{
logger_
->
bar
(
"[racon::CUDAPolisher::initialize] aligning overlaps"
);
std
::
cerr
<<
std
::
endl
;
log_bar_idx_prev
=
log_bar_idx
;
}
}
...
...
@@ -168,10 +168,12 @@ void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Over
{
for
(
uint32_t
batch
=
0
;
batch
<
batches_per_gpu
.
at
(
device
);
batch
++
)
{
batch_aligners_
.
emplace_back
(
createCUDABatchAligner
(
1
0
000
,
1
0
000
,
MAX_ALIGNMENTS
,
device
));
batch_aligners_
.
emplace_back
(
createCUDABatchAligner
(
1
5
000
,
1
5
000
,
MAX_ALIGNMENTS
,
device
));
}
}
logger_
->
log
(
"[racon::CUDAPolisher::initialize] allocated memory on GPUs for alignment"
);
// Run batched alignment.
std
::
vector
<
std
::
future
<
void
>>
thread_futures
;
for
(
auto
&
aligner
:
batch_aligners_
)
...
...
@@ -203,7 +205,6 @@ void CUDAPolisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,
else
{
// Creation and use of batches.
const
uint32_t
MAX_WINDOWS
=
256
;
const
uint32_t
MAX_DEPTH_PER_WINDOW
=
200
;
// Bin batches into each GPU.
...
...
@@ -211,13 +212,19 @@ void CUDAPolisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,
for
(
int32_t
device
=
0
;
device
<
num_devices_
;
device
++
)
{
size_t
total
=
0
,
free
=
0
;
CGA_CU_CHECK_ERR
(
cudaSetDevice
(
device
));
CGA_CU_CHECK_ERR
(
cudaMemGetInfo
(
&
free
,
&
total
));
// Using 90% of available memory as heuristic since not all available memory can be used
// due to fragmentation.
size_t
mem_per_batch
=
0.9
*
free
/
batches_per_gpu
.
at
(
device
);
for
(
uint32_t
batch
=
0
;
batch
<
batches_per_gpu
.
at
(
device
);
batch
++
)
{
batch_processors_
.
emplace_back
(
createCUDABatch
(
MAX_WINDOWS
,
MAX_DEPTH_PER_WINDOW
,
device
,
gap_
,
mismatch_
,
match_
,
cuda_banded_alignment_
));
batch_processors_
.
emplace_back
(
createCUDABatch
(
MAX_DEPTH_PER_WINDOW
,
device
,
mem_per_batch
,
gap_
,
mismatch_
,
match_
,
cuda_banded_alignment_
));
}
}
logger_
->
log
(
"[racon::CUDAPolisher::polish] allocated memory on GPUs"
);
logger_
->
log
(
"[racon::CUDAPolisher::polish] allocated memory on GPUs
for polishing
"
);
// Mutex for accessing the vector of windows.
std
::
mutex
mutex_windows
;
...
...
@@ -294,10 +301,12 @@ void CUDAPolisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,
continue
;
}
else
if
(
logger_step
!=
0
&&
log_bar_idx
<
static_cast
<
int32_t
>
(
RACON_LOGGER_BIN_SIZE
))
{
while
(
log_bar_idx_prev
<=
log_bar_idx
)
{
logger_
->
bar
(
"[racon::CUDAPolisher::polish] generating consensus"
);
std
::
cerr
<<
std
::
endl
;
log_bar_idx_prev
=
log_bar_idx
;
log_bar_idx_prev
++
;
}
}
}
}
...
...
@@ -343,7 +352,7 @@ void CUDAPolisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,
exit
(
1
);
}
return
window_consensus_status_
.
at
(
j
)
=
windows_
[
j
]
->
generate_consensus
(
alignment_engines_
[
it
->
second
]);
alignment_engines_
[
it
->
second
]
,
trim_
);
},
i
));
}
}
...
...
src/cuda/cudapolisher.hpp
View file @
63959b12
...
...
@@ -26,7 +26,7 @@ public:
friend
std
::
unique_ptr
<
Polisher
>
createPolisher
(
const
std
::
string
&
sequences_path
,
const
std
::
string
&
overlaps_path
,
const
std
::
string
&
target_path
,
PolisherType
type
,
uint32_t
window_length
,
double
quality_threshold
,
double
error_threshold
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
double
error_threshold
,
bool
trim
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
uint32_t
num_threads
,
uint32_t
cudapoa_batches
,
bool
cuda_banded_alignment
,
uint32_t
cudaaligner_batches
);
...
...
@@ -35,7 +35,7 @@ protected:
std
::
unique_ptr
<
bioparser
::
Parser
<
Overlap
>>
oparser
,
std
::
unique_ptr
<
bioparser
::
Parser
<
Sequence
>>
tparser
,
PolisherType
type
,
uint32_t
window_length
,
double
quality_threshold
,
double
error_threshold
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
double
error_threshold
,
bool
trim
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
uint32_t
num_threads
,
uint32_t
cudapoa_batches
,
bool
cuda_banded_alignment
,
uint32_t
cudaaligner_batches
);
CUDAPolisher
(
const
CUDAPolisher
&
)
=
delete
;
...
...
src/main.cpp
View file @
63959b12
...
...
@@ -12,7 +12,11 @@
#include
"cuda/cudapolisher.hpp"
#endif
static
const
char
*
version
=
"v1.4.3"
;
#ifndef RACON_VERSION
#error "Undefined version for Racon. Please pass version using -DRACON_VERSION macro."
#endif
static
const
char
*
version
=
RACON_VERSION
;
static
const
int32_t
CUDAALIGNER_INPUT_CODE
=
10000
;
static
struct
option
options
[]
=
{
...
...
@@ -21,6 +25,7 @@ static struct option options[] = {
{
"window-length"
,
required_argument
,
0
,
'w'
},
{
"quality-threshold"
,
required_argument
,
0
,
'q'
},
{
"error-threshold"
,
required_argument
,
0
,
'e'
},
{
"no-trimming"
,
no_argument
,
0
,
'T'
},
{
"match"
,
required_argument
,
0
,
'm'
},
{
"mismatch"
,
required_argument
,
0
,
'x'
},
{
"gap"
,
required_argument
,
0
,
'g'
},
...
...
@@ -44,10 +49,11 @@ int main(int argc, char** argv) {
uint32_t
window_length
=
500
;
double
quality_threshold
=
10.0
;
double
error_threshold
=
0.3
;
bool
trim
=
true
;
int8_t
match
=
5
;
int8_t
mismatch
=
-
4
;
int8_t
gap
=
-
8
;
int8_t
match
=
3
;
int8_t
mismatch
=
-
5
;
int8_t
gap
=
-
4
;
uint32_t
type
=
0
;
bool
drop_unpolished_sequences
=
true
;
...
...
@@ -80,6 +86,9 @@ int main(int argc, char** argv) {
case
'e'
:
error_threshold
=
atof
(
optarg
);
break
;
case
'T'
:
trim
=
false
;
break
;
case
'm'
:
match
=
atoi
(
optarg
);
break
;
...
...
@@ -137,7 +146,7 @@ int main(int argc, char** argv) {
auto
polisher
=
racon
::
createPolisher
(
input_paths
[
0
],
input_paths
[
1
],
input_paths
[
2
],
type
==
0
?
racon
::
PolisherType
::
kC
:
racon
::
PolisherType
::
kF
,
window_length
,
quality_threshold
,
error_threshold
,
match
,
mismatch
,
gap
,
num_threads
,
error_threshold
,
trim
,
match
,
mismatch
,
gap
,
num_threads
,
cudapoa_batches
,
cuda_banded_alignment
,
cudaaligner_batches
);
polisher
->
initialize
();
...
...
@@ -181,14 +190,16 @@ void help() {
" -e, --error-threshold <float>
\n
"
" default: 0.3
\n
"
" maximum allowed error rate used for filtering overlaps
\n
"
" --no-trimming
\n
"
" disables consensus trimming at window ends
\n
"
" -m, --match <int>
\n
"
" default:
5
\n
"
" default:
3
\n
"
" score for matching bases
\n
"
" -x, --mismatch <int>
\n
"
" default: -
4
\n
"
" default: -
5
\n
"
" score for mismatching bases
\n
"
" -g, --gap <int>
\n
"
" default: -
8
\n
"
" default: -
4
\n
"
" gap penalty (must be negative)
\n
"
" -t, --threads <int>
\n
"
" default: 1
\n
"
...
...
@@ -203,7 +214,7 @@ void help() {
" number of batches for CUDA accelerated polishing
\n
"
" -b, --cuda-banded-alignment
\n
"
" use banding approximation for alignment on GPU
\n
"
" --cudaaligner-batches
(experimental)
\n
"
" --cudaaligner-batches
\n
"
" Number of batches for CUDA accelerated alignment
\n
"
#endif
...
...
src/polisher.cpp
View file @
63959b12
...
...
@@ -55,7 +55,7 @@ uint64_t shrinkToFit(std::vector<std::unique_ptr<T>>& src, uint64_t begin) {
std
::
unique_ptr
<
Polisher
>
createPolisher
(
const
std
::
string
&
sequences_path
,
const
std
::
string
&
overlaps_path
,
const
std
::
string
&
target_path
,
PolisherType
type
,
uint32_t
window_length
,
double
quality_threshold
,
double
error_threshold
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
double
error_threshold
,
bool
trim
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
uint32_t
num_threads
,
uint32_t
cudapoa_batches
,
bool
cuda_banded_alignment
,
uint32_t
cudaaligner_batches
)
{
...
...
@@ -80,18 +80,20 @@ std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
return
src
.
compare
(
src
.
size
()
-
suffix
.
size
(),
suffix
.
size
(),
suffix
)
==
0
;
};
if
(
is_suffix
(
sequences_path
,
".fasta"
)
||
is_suffix
(
sequences_path
,
".fa"
)
||
is_suffix
(
sequences_path
,
".fasta.gz"
)
||
is_suffix
(
sequences_path
,
".fa.gz"
))
{
if
(
is_suffix
(
sequences_path
,
".fasta"
)
||
is_suffix
(
sequences_path
,
".fasta.gz"
)
||
is_suffix
(
sequences_path
,
".fna"
)
||
is_suffix
(
sequences_path
,
".fna.gz"
)
||
is_suffix
(
sequences_path
,
".fa"
)
||
is_suffix
(
sequences_path
,
".fa.gz"
))
{
sparser
=
bioparser
::
createParser
<
bioparser
::
FastaParser
,
Sequence
>
(
sequences_path
);
}
else
if
(
is_suffix
(
sequences_path
,
".fastq"
)
||
is_suffix
(
sequences_path
,
".f
q
"
)
||
is_suffix
(
sequences_path
,
".f
astq.gz
"
)
||
is_suffix
(
sequences_path
,
".fq.gz"
))
{
}
else
if
(
is_suffix
(
sequences_path
,
".fastq"
)
||
is_suffix
(
sequences_path
,
".f
astq.gz
"
)
||
is_suffix
(
sequences_path
,
".f
q
"
)
||
is_suffix
(
sequences_path
,
".fq.gz"
))
{
sparser
=
bioparser
::
createParser
<
bioparser
::
FastqParser
,
Sequence
>
(
sequences_path
);
}
else
{
fprintf
(
stderr
,
"[racon::createPolisher] error: "
"file %s has unsupported format extension (valid extensions: "
".fasta, .fasta.gz, .fa, .fa.gz, .fastq, .fastq.gz, .fq, .fq.gz)!
\n
"
,
".fasta, .fasta.gz, .fna, .fna.gz, .fa, .fa.gz, .fastq, .fastq.gz, "
".fq, .fq.gz)!
\n
"
,
sequences_path
.
c_str
());
exit
(
1
);
}
...
...
@@ -112,18 +114,20 @@ std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
exit
(
1
);
}
if
(
is_suffix
(
target_path
,
".fasta"
)
||
is_suffix
(
target_path
,
".fa"
)
||
is_suffix
(
target_path
,
".fasta.gz"
)
||
is_suffix
(
target_path
,
".fa.gz"
))
{
if
(
is_suffix
(
target_path
,
".fasta"
)
||
is_suffix
(
target_path
,
".fasta.gz"
)
||
is_suffix
(
target_path
,
".fna"
)
||
is_suffix
(
target_path
,
".fna.gz"
)
||
is_suffix
(
target_path
,
".fa"
)
||
is_suffix
(
target_path
,
".fa.gz"
))
{
tparser
=
bioparser
::
createParser
<
bioparser
::
FastaParser
,
Sequence
>
(
target_path
);
}
else
if
(
is_suffix
(
target_path
,
".fastq"
)
||
is_suffix
(
target_path
,
".f
q
"
)
||
is_suffix
(
target_path
,
".f
astq.gz
"
)
||
is_suffix
(
target_path
,
".fq.gz"
))
{
}
else
if
(
is_suffix
(
target_path
,
".fastq"
)
||
is_suffix
(
target_path
,
".f
astq.gz
"
)
||
is_suffix
(
target_path
,
".f
q
"
)
||
is_suffix
(
target_path
,
".fq.gz"
))
{
tparser
=
bioparser
::
createParser
<
bioparser
::
FastqParser
,
Sequence
>
(
target_path
);
}
else
{
fprintf
(
stderr
,
"[racon::createPolisher] error: "
"file %s has unsupported format extension (valid extensions: "
".fasta, .fasta.gz, .fa, .fa.gz, .fastq, .fastq.gz, .fq, .fq.gz)!
\n
"
,
".fasta, .fasta.gz, .fna, .fna.gz, .fa, .fa.gz, .fastq, .fastq.gz, "
".fq, .fq.gz)!
\n
"
,
target_path
.
c_str
());
exit
(
1
);
}
...
...
@@ -134,7 +138,7 @@ std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
// If CUDA is enabled, return an instance of the CUDAPolisher object.
return
std
::
unique_ptr
<
Polisher
>
(
new
CUDAPolisher
(
std
::
move
(
sparser
),
std
::
move
(
oparser
),
std
::
move
(
tparser
),
type
,
window_length
,
quality_threshold
,
error_threshold
,
match
,
mismatch
,
gap
,
quality_threshold
,
error_threshold
,
trim
,
match
,
mismatch
,
gap
,
num_threads
,
cudapoa_batches
,
cuda_banded_alignment
,
cudaaligner_batches
));
#else
fprintf
(
stderr
,
"[racon::createPolisher] error: "
...
...
@@ -149,7 +153,7 @@ std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
(
void
)
cuda_banded_alignment
;
return
std
::
unique_ptr
<
Polisher
>
(
new
Polisher
(
std
::
move
(
sparser
),
std
::
move
(
oparser
),
std
::
move
(
tparser
),
type
,
window_length
,
quality_threshold
,
error_threshold
,
match
,
mismatch
,
gap
,
quality_threshold
,
error_threshold
,
trim
,
match
,
mismatch
,
gap
,
num_threads
));
}
}
...
...
@@ -158,11 +162,11 @@ Polisher::Polisher(std::unique_ptr<bioparser::Parser<Sequence>> sparser,
std
::
unique_ptr
<
bioparser
::
Parser
<
Overlap
>>
oparser
,
std
::
unique_ptr
<
bioparser
::
Parser
<
Sequence
>>
tparser
,
PolisherType
type
,
uint32_t
window_length
,
double
quality_threshold
,
double
error_threshold
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
double
error_threshold
,
bool
trim
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
uint32_t
num_threads
)
:
sparser_
(
std
::
move
(
sparser
)),
oparser_
(
std
::
move
(
oparser
)),
tparser_
(
std
::
move
(
tparser
)),
type_
(
type
),
quality_threshold_
(
quality_threshold
),
error_threshold_
(
error_threshold
),
quality_threshold
),
error_threshold_
(
error_threshold
),
trim_
(
trim
),
alignment_engines_
(),
sequences_
(),
dummy_quality_
(
window_length
,
'!'
),
window_length_
(
window_length
),
windows_
(),
thread_pool_
(
thread_pool
::
createThreadPool
(
num_threads
)),
...
...
@@ -494,7 +498,7 @@ void Polisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,
exit
(
1
);
}
return
windows_
[
j
]
->
generate_consensus
(
alignment_engines_
[
it
->
second
]);
alignment_engines_
[
it
->
second
]
,
trim_
);
},
i
));
}
...
...
src/polisher.hpp
View file @
63959b12
...
...
@@ -44,7 +44,7 @@ class Polisher;
std
::
unique_ptr
<
Polisher
>
createPolisher
(
const
std
::
string
&
sequences_path
,
const
std
::
string
&
overlaps_path
,
const
std
::
string
&
target_path
,
PolisherType
type
,
uint32_t
window_length
,
double
quality_threshold
,
double
error_threshold
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
double
error_threshold
,
bool
trim
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
uint32_t
num_threads
,
uint32_t
cuda_batches
=
0
,
bool
cuda_banded_alignment
=
false
,
uint32_t
cudaaligner_batches
=
0
);
...
...
@@ -60,7 +60,7 @@ public:
friend
std
::
unique_ptr
<
Polisher
>
createPolisher
(
const
std
::
string
&
sequences_path
,
const
std
::
string
&
overlaps_path
,
const
std
::
string
&
target_path
,
PolisherType
type
,
uint32_t
window_length
,
double
quality_threshold
,
double
error_threshold
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
double
error_threshold
,
bool
trim
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
uint32_t
num_threads
,
uint32_t
cuda_batches
,
bool
cuda_banded_alignment
,
uint32_t
cudaaligner_batches
);
...
...
@@ -69,7 +69,7 @@ protected:
std
::
unique_ptr
<
bioparser
::
Parser
<
Overlap
>>
oparser
,
std
::
unique_ptr
<
bioparser
::
Parser
<
Sequence
>>
tparser
,
PolisherType
type
,
uint32_t
window_length
,
double
quality_threshold
,
double
error_threshold
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
double
error_threshold
,
bool
trim
,
int8_t
match
,
int8_t
mismatch
,
int8_t
gap
,
uint32_t
num_threads
);
Polisher
(
const
Polisher
&
)
=
delete
;
const
Polisher
&
operator
=
(
const
Polisher
&
)
=
delete
;
...
...
@@ -82,6 +82,7 @@ protected:
PolisherType
type_
;
double
quality_threshold_
;
double
error_threshold_
;
bool
trim_
;
std
::
vector
<
std
::
shared_ptr
<
spoa
::
AlignmentEngine
>>
alignment_engines_
;
std
::
vector
<
std
::
unique_ptr
<
Sequence
>>
sequences_
;
...
...
src/window.cpp
View file @
63959b12
...
...
@@ -58,7 +58,8 @@ void Window::add_layer(const char* sequence, uint32_t sequence_length,
positions_
.
emplace_back
(
begin
,
end
);
}
bool
Window
::
generate_consensus
(
std
::
shared_ptr
<
spoa
::
AlignmentEngine
>
alignment_engine
)
{
bool
Window
::
generate_consensus
(
std
::
shared_ptr
<
spoa
::
AlignmentEngine
>
alignment_engine
,
bool
trim
)
{
if
(
sequences_
.
size
()
<
3
)
{
consensus_
=
std
::
string
(
sequences_
.
front
().
first
,
sequences_
.
front
().
second
);
...
...
@@ -86,14 +87,14 @@ bool Window::generate_consensus(std::shared_ptr<spoa::AlignmentEngine> alignment
spoa
::
Alignment
alignment
;
if
(
positions_
[
i
].
first
<
offset
&&
positions_
[
i
].
second
>
sequences_
.
front
().
second
-
offset
)
{
alignment
=
alignment_engine
->
align
_
sequence
_with_graph
(
sequences_
[
i
].
first
,
sequences_
[
i
].
second
,
graph
);
alignment
=
alignment_engine
->
align
(
sequence
s_
[
i
].
first
,
sequences_
[
i
].
second
,
graph
);
}
else
{
std
::
vector
<
int32_t
>
mapping
;
auto
subgraph
=
graph
->
subgraph
(
positions_
[
i
].
first
,
positions_
[
i
].
second
,
mapping
);
alignment
=
alignment_engine
->
align
_
sequence
_with_graph
(
sequences_
[
i
].
first
,
sequences_
[
i
].
second
,
subgraph
);
alignment
=
alignment_engine
->
align
(
sequence
s_
[
i
].
first
,
sequences_
[
i
].
second
,
subgraph
);
subgraph
->
update_alignment
(
alignment
,
mapping
);
}
...
...
@@ -110,7 +111,7 @@ bool Window::generate_consensus(std::shared_ptr<spoa::AlignmentEngine> alignment
std
::
vector
<
uint32_t
>
coverages
;
consensus_
=
graph
->
generate_consensus
(
coverages
);
if
(
type_
==
WindowType
::
kTGS
)
{
if
(
type_
==
WindowType
::
kTGS
&&
trim
)
{
uint32_t
average_coverage
=
(
sequences_
.
size
()
-
1
)
/
2
;
int32_t
begin
=
0
,
end
=
consensus_
.
size
()
-
1
;
...
...
src/window.hpp
View file @
63959b12
...
...
@@ -44,7 +44,8 @@ public:
return
consensus_
;
}
bool
generate_consensus
(
std
::
shared_ptr
<
spoa
::
AlignmentEngine
>
alignment_engine
);
bool
generate_consensus
(
std
::
shared_ptr
<
spoa
::
AlignmentEngine
>
alignment_engine
,
bool
trim
);
void
add_layer
(
const
char
*
sequence
,
uint32_t
sequence_length
,
const
char
*
quality
,
uint32_t
quality_length
,
uint32_t
begin
,
...
...
test/racon_test.cpp
View file @
63959b12
...
...
@@ -33,7 +33,7 @@ public:
bool
cuda_banded_alignment
=
false
,
uint32_t
cudaaligner_batches
=
0
)
{
polisher
=
racon
::
createPolisher
(
sequences_path
,
overlaps_path
,
target_path
,
type
,
window_length
,
quality_threshold
,
error_threshold
,
match
,
type
,
window_length
,
quality_threshold
,
error_threshold
,
true
,
match
,
mismatch
,
gap
,
4
,
cuda_batches
,
cuda_banded_alignment
,
cudaaligner_batches
);
}
...
...
@@ -54,25 +54,25 @@ public:
TEST
(
RaconInitializeTest
,
PolisherTypeError
)
{
EXPECT_DEATH
((
racon
::
createPolisher
(
""
,
""
,
""
,
static_cast
<
racon
::
PolisherType
>
(
3
),
0
,
0
,
0
,
0
,
0
,
0
,
0
)),
".racon::createPolisher. error: invalid polisher"
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
)),
".racon::createPolisher. error: invalid polisher"
" type!"
);
}
TEST
(
RaconInitializeTest
,
WindowLengthError
)
{
EXPECT_DEATH
((
racon
::
createPolisher
(
""
,
""
,
""
,
racon
::
PolisherType
::
kC
,
0
,
0
,
0
,
0
,
0
,
0
,
0
)),
".racon::createPolisher. error: invalid window length!"
);
0
,
0
,
0
,
0
,
0
,
0
,
0
)),
".racon::createPolisher. error: invalid window length!"
);
}
TEST
(
RaconInitializeTest
,
SequencesPathExtensionError
)
{
EXPECT_DEATH
((
racon
::
createPolisher
(
""
,
""
,
""
,
racon
::
PolisherType
::
kC
,
500
,
0
,
0
,
0
,
0
,
0
,
0
)),
".racon::createPolisher. error: file has unsupported "
"format extension .valid extensions: .fasta, .fasta.gz, .fa, .fa.gz, "
".fastq, .fastq.gz, .fq, .fq.gz.!"
);
0
,
0
,
0
,
0
,
0
,
0
,
0
)),
".racon::createPolisher. error: file has unsupported "
"format extension .valid extensions: .fasta, .fasta.gz, .f
n
a, .f
n
a.gz, "
"
.fa, .fa.gz,
.fastq, .fastq.gz, .fq, .fq.gz.!"
);
}
TEST
(
RaconInitializeTest
,
OverlapsPathExtensionError
)
{
EXPECT_DEATH
((
racon
::
createPolisher
(
racon_test_data_path
+
"sample_reads.fastq.gz"
,
""
,
""
,
racon
::
PolisherType
::
kC
,
500
,
0
,
0
,
0
,
0
,
0
,
0
)),
""
,
""
,
racon
::
PolisherType
::
kC
,
500
,
0
,
0
,
0
,
0
,
0
,
0
,
0
)),
".racon::createPolisher. error: file has unsupported format extension "
".valid extensions: .mhap, .mhap.gz, .paf, .paf.gz, .sam, .sam.gz.!"
);
}
...
...
@@ -80,9 +80,9 @@ TEST(RaconInitializeTest, OverlapsPathExtensionError) {
TEST
(
RaconInitializeTest
,
TargetPathExtensionError
)
{
EXPECT_DEATH
((
racon
::
createPolisher
(
racon_test_data_path
+
"sample_reads.fastq.gz"
,
racon_test_data_path
+
"sample_overlaps.paf.gz"
,
""
,
racon
::
PolisherType
::
kC
,
500
,
0
,
0
,
0
,
0
,
0
,
0
)),
".racon::createPolisher. error: file has "
"unsupported format extension .valid extensions: .fasta, .fasta.gz, .fa,"
" .fa.gz, .fastq, .fastq.gz, .fq, .fq.gz.!"
);
500
,
0
,
0
,
0
,
0
,
0
,
0
,
0
)),
".racon::createPolisher. error: file has "
"unsupported format extension .valid extensions: .fasta, .fasta.gz, .f
n
a,
"
"
.fna.gz, .fa,
.fa.gz, .fastq, .fastq.gz, .fq, .fq.gz.!"
);
}
TEST_F
(
RaconPolishingTest
,
ConsensusWithQualities
)
{
...
...