Skip to content
Commits on Source (3)
......@@ -3,11 +3,17 @@ MAINTAINER Matei David <matei.david.at.oicr.on.ca>
ARG DEBIAN_FRONTEND=noninteractive
# install prerequisites
RUN apt-get update && \
RUN for i in 1 2 3; do \
apt-get update \
&& break; sleep 1; \
done && \
for i in 1 2 3; do \
apt-get install -y \
build-essential \
cmake \
libhdf5-dev
libhdf5-dev \
&& break; sleep 1; \
done
# if necessary, specify compiler
#RUN apt-get install -y g++-4.9 g++-5 g++-6
......
......@@ -2,7 +2,7 @@
*** Nanocall: An Oxford Nanopore Basecaller
[[http://travis-ci.org/mateidavid/nanocall][http://travis-ci.org/mateidavid/nanocall.svg?branch=master]]
[[http://travis-ci.org/mateidavid/nanocall][http://travis-ci.org/mateidavid/nanocall.svg?branch=master]] [[https://tldrlegal.com/license/mit-license][http://img.shields.io/:license-mit-blue.svg]]
**** Installation
......
nanocall (0.7.4-1) UNRELEASED; urgency=medium
* New upstream release - throws same compile error as 0.6.14 which is not the same as reported
in that issue 11:
In file included from /home/moeller/git/nanocall/src/nanocall/nanocall.cpp:10:0:
/home/moeller/git/nanocall/src/nanocall/Pore_Model.hpp:99:48: error: ‘Model_Entry’ in namespace ‘fast5’ does not name a type
Pore_Model_State& operator = (const fast5::Model_Entry& e)
^~~~~~~~~~~
compilation terminated due to -fmax-errors=1.
-- Steffen Moeller <moeller@debian.org> Mon, 30 Apr 2018 23:39:17 +0200
nanocall (0.6.14-1) UNRELEASED; urgency=medium
* Initial release (Closes: #<bug>)
......
Author: Andreas Tille <tille@debian.org>
Last-Update: Tue, 13 Sep 2016 14:28:24 +0200
Description: Delete Makefile which does more harm than good - try cmake instead
--- a/src/Makefile
+++ /dev/null
@@ -1,35 +0,0 @@
-SHELL := /bin/bash
-
-HDF_ROOT = /usr
-
-OPT_FLAG = -O0
-CXXFLAGS = -Wall -Wextra -pedantic -g3 -ggdb -fno-eliminate-unused-debug-types ${OPT_FLAG}
-M_CXXFLAGS = -std=c++11 -pthread
-CPPFLAGS = -isystem ${HDF_ROOT}/include -I fast5/src -I tclap/include -I hpptools/include
-
-TARGETS = compute-state-transitions compute-scaled-pore-model run-fwbw run-viterbi nanocall
-
-.PHONY: all test clean
-
-all: ${TARGETS}
-
-clean:
- rm -rf ${TARGETS}
-
-compute-state-transitions: compute-state-transitions.cpp
- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS}
-
-compute-scaled-pore-model: compute-scaled-pore-model.cpp
- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -L ${HDF_ROOT}/lib -lhdf5
-
-run-fwbw: run-fwbw.cpp
- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -lz
-
-run-viterbi: run-viterbi.cpp
- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -lz
-
-nanocall: nanocall.cpp Builtin_Model.cpp
- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -L ${HDF_ROOT}/lib -lhdf5 -lz
-
-list-directory: list-directory.cpp
- ${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS}
remove_broken_makefile.patch
use_debian_packaged_libs.patch
no_rpath.patch
tclap.patch
circumvent_compile_error.patch
SHELL := /bin/bash
HDF_ROOT = /usr
OPT_FLAG = -O0
CXXFLAGS = -Wall -Wextra -pedantic -g3 -ggdb -fno-eliminate-unused-debug-types ${OPT_FLAG}
M_CXXFLAGS = -std=c++11 -pthread
CPPFLAGS = -isystem ${HDF_ROOT}/include -I fast5/src -I tclap/include -I hpptools/include
TARGETS = compute-state-transitions compute-scaled-pore-model run-fwbw run-viterbi nanocall
.PHONY: all test clean
all: ${TARGETS}
clean:
rm -rf ${TARGETS}
compute-state-transitions: compute-state-transitions.cpp
${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS}
compute-scaled-pore-model: compute-scaled-pore-model.cpp
${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -L ${HDF_ROOT}/lib -lhdf5
run-fwbw: run-fwbw.cpp
${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -lz
run-viterbi: run-viterbi.cpp
${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -lz
nanocall: nanocall.cpp Builtin_Model.cpp
${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS} -L ${HDF_ROOT}/lib -lhdf5 -lz
list-directory: list-directory.cpp
${CXX} ${M_CXXFLAGS} ${CXXFLAGS} ${CPPFLAGS} $^ -o $@ ${LDFLAGS}
......@@ -22,7 +22,7 @@ public:
Float_Type log_mean;
Float_Type log_corrected_mean;
Float_Type log_stdv;
Float_Type log_start;
//Float_Type log_start;
//
Float_Type orig_mean;
Float_Type p_model_state;
......@@ -32,10 +32,16 @@ public:
//
void update_logs()
{
assert(mean > 0);
log_mean = std::log(mean);
assert(corrected_mean > 0);
log_corrected_mean = std::log(corrected_mean);
if (stdv == 0.0)
{
stdv = 0.01;
}
log_stdv = std::log(stdv);
log_start = std::log(start);
//log_start = std::log(start);
}
void set_model_state(const std::string& s)
{
......
......@@ -73,7 +73,7 @@ public:
static unsigned& min_ed_events()
{
static unsigned _min_ed_events = 1000;
static unsigned _min_ed_events = 10;
return _min_ed_events;
}
......@@ -117,6 +117,20 @@ public:
return _hairpin_island_window_load;
}
// if set, do not split strands
static unsigned& template_only()
{
static unsigned _template_only = 0;
return _template_only;
}
// trim margins: after start, before end, before hairpin start, after hairpin end
static std::array< unsigned, 4 >& trim_margins()
{
static std::array< unsigned, 4 > _trim_margins = {{ 50u, 50u, 50u, 50u }};
return _trim_margins;
}
Fast5_Summary() : valid(false) {}
Fast5_Summary(const std::string fn, const Pore_Model_Dict_Type& models, bool sst)
: valid(false) { summarize(fn, models, sst); }
......@@ -167,17 +181,11 @@ public:
{
read_id = ed_params.read_id;
}
load_ed_events(&f);
num_ed_events = ed_events().size();
if (num_ed_events < 100 + min_ed_events())
{
LOG("Fast5_Summary", info) << file_name << ": not enough eventdetection events: " << num_ed_events << std::endl;
num_ed_events = 0;
break;
}
if (num_ed_events > max_ed_events())
load_ed_events(&f); // also sets num_ed_events
if (num_ed_events < trim_margins()[0] + trim_margins()[1] + min_ed_events())
{
LOG("Fast5_Summary", info) << file_name << ": too many eventdetection events: " << num_ed_events << std::endl;
LOG("Fast5_Summary", info)
<< file_name << ": not enough eventdetection events: " << num_ed_events << std::endl;
num_ed_events = 0;
break;
}
......@@ -185,12 +193,14 @@ public:
abasic_level = detect_abasic_level();
if (abasic_level <= 1.0)
{
LOG("Fast5_Summary", info) << file_name << ": abasic level too low: " << abasic_level << std::endl;
LOG("Fast5_Summary", info)
<< file_name << ": abasic level too low: " << abasic_level << std::endl;
num_ed_events = 0;
break;
}
// detect strands
detect_strands();
strand_bounds = {{ trim_margins()[0], num_ed_events - trim_margins()[1], 0, 0 }};
if (not template_only()) detect_strands();
if (strand_bounds[1] <= strand_bounds[0])
{
LOG("Fast5_Summary", info) << file_name << ": no template strand detected" << std::endl;
......@@ -497,15 +507,26 @@ private:
ed_events_ptr = decltype(ed_events_ptr)(
new typename decltype(ed_events_ptr)::element_type(
f_p->get_eventdetection_events(eventdetection_group())));
if (num_ed_events == 0)
{
if (ed_events().size() > max_ed_events())
{
LOG("Fast5_Summary", info)
<< file_name << ": using only " << max_ed_events()
<< " of " << ed_events().size() << " events" << std::endl;
num_ed_events = max_ed_events();
}
else
{
num_ed_events = ed_events().size();
}
}
ed_events().resize(num_ed_events);
}
// crude detection of abasic level
Float_Type detect_abasic_level()
{
if (ed_events().size() < min_ed_events())
{
return 0.0;
}
//
// exclude top abasic_level_top_percent() levels
// add abasic_level_top_offset()
......@@ -631,11 +652,6 @@ private:
// crude detection of strands in event sequence
void detect_strands()
{
if (ed_events().size() < 100u)
{
return;
}
strand_bounds = { { 50, static_cast< unsigned >(ed_events().size() - 50), 0, 0 } };
LOG("Fast5_Summary", debug)
<< "num_events=" << ed_events().size()
<< " abasic_level=" << abasic_level << std::endl;
......@@ -648,7 +664,7 @@ private:
//
for (unsigned i = 1; i < islands.size(); ++i)
{
if (islands[i - 1].second + 50 >= islands[i].first)
if (islands[i - 1].second + std::max(trim_margins()[2], trim_margins()[3]) >= islands[i].first)
{
LOG("Fast5_Summary", debug) << "merge_islands "
<< "[" << islands[i - 1].first << "," << islands[i - 1].second << "] with "
......@@ -699,15 +715,15 @@ private:
{
LOG("Fast5_Summary", debug)
<< "hairpin_island [" << it->first << "," << it->second << "]" << std::endl;
strand_bounds[0] = 50;
if (islands[0].first < 100)
strand_bounds[0] = trim_margins()[0];
if (islands[0].first < trim_margins()[0] + trim_margins()[2])
{
strand_bounds[0] = std::max(strand_bounds[0], islands[0].second);
}
strand_bounds[1] = it->first - 50;
strand_bounds[2] = it->first + 50;
strand_bounds[3] = ed_events().size() - 50;
if (islands[islands.size() - 1].second > ed_events().size() - 100)
strand_bounds[1] = it->first - trim_margins()[2];
strand_bounds[2] = it->first + trim_margins()[3];
strand_bounds[3] = ed_events().size() - trim_margins()[1];
if (islands[islands.size() - 1].second > ed_events().size() - (trim_margins()[3] + trim_margins()[1]))
{
strand_bounds[3] = std::min(strand_bounds[3], islands[islands.size() - 1].first);
}
......
......@@ -62,6 +62,12 @@ struct Parameter_Trainer
return _st_train_kmers;
}
static unsigned& pm_train_drift()
{
static unsigned _pm_train_drift = 1;
return _pm_train_drift;
}
/**
* Struct used for training rounds.
* @event_seq_ptr_v Vector of pairs, first: an event sequence, second: strand from which it comes
......@@ -290,13 +296,16 @@ struct Parameter_Trainer
} // for j
A[0][0] += s[0];
A[0][1] += s[1];
A[0][2] += s[0] * t_i;
A[1][1] += s[2];
A[1][2] += s[1] * t_i;
A[2][2] += s[0] * t_i * t_i;
B[0] += s[0] * x_i;
B[1] += s[1] * x_i;
if (pm_train_drift())
{
A[0][2] += s[0] * t_i;
A[1][2] += s[1] * t_i;
A[2][2] += s[0] * t_i * t_i;
B[2] += s[0] * x_i * t_i;
}
D += s[0] * x_i * x_i;
V_numer += l[2] * y_i;
V_denom += l[1];
......@@ -306,6 +315,10 @@ struct Parameter_Trainer
A[1][0] = A[0][1];
A[2][0] = A[0][2];
A[2][1] = A[1][2];
if (not pm_train_drift())
{
A[2][2] = 1.0;
}
auto A_copy = A;
auto B_copy = B;
// compute scaling vector used for scaled partial pivoting
......@@ -384,7 +397,7 @@ struct Parameter_Trainer
double x = (A_copy[i][0] * a_hat
+ A_copy[i][1] * b_hat
+ A_copy[i][2] * c_hat);
ASSERT((x - B_copy[i])/std::max(x, B_copy[i]) < 1e-3);
ASSERT((x - B_copy[i])/std::max(x, B_copy[i]) < pm_train_drift()? 1e-3 : 1e-2);
}
#endif
//
......
......@@ -57,6 +57,11 @@ namespace opts
ValueArg< unsigned > chunk_size("", "chunk-size", "Thread chunk size.", false, 1, "int", cmd_parser);
MultiArg< string > log_level("", "log", "Log level. (default: info)", false, "string", cmd_parser);
ValueArg< string > stats_fn("", "stats", "Stats.", false, "", "file", cmd_parser);
ValueArg< string > train_drift("", "train-drift", "Train drift parameter. (default: yes for R73, no for R9)", false, "", "0|1", cmd_parser);
ValueArg< unsigned > trim_ed_hp_end("", "trim-ed-hp-end", "Number of events to trim after hairpin end.", false, 50, "int", cmd_parser);
ValueArg< unsigned > trim_ed_hp_start("", "trim-ed-hp-start", "Number of events to trim before hairpin start.", false, 50, "int", cmd_parser);
ValueArg< unsigned > trim_ed_sq_end("", "trim-ed-sq-end", "Number of events to trim before sequence end.", false, 50, "int", cmd_parser);
ValueArg< unsigned > trim_ed_sq_start("", "trim-ed-sq-start", "Number of events to trim after sequence start.", false, 50, "int", cmd_parser);
ValueArg< unsigned > max_ed_events("", "max-ed-events", "Maximum EventDetection events.", false, 100000, "int", cmd_parser);
ValueArg< unsigned > min_ed_events("", "min-ed-events", "Minimum EventDetection events.", false, 10, "int", cmd_parser);
ValueArg< unsigned > fasta_line_width("", "fasta-line-width", "Maximum fasta line width.", false, 80, "int", cmd_parser);
......@@ -66,6 +71,7 @@ namespace opts
ValueArg< unsigned > scaling_max_rounds("", "scaling-max-rounds", "Maximum scaling rounds.", false, 10, "int", cmd_parser);
ValueArg< unsigned > scaling_num_events("", "scaling-num-events", "Number of events used for model scaling.", false, 200, "int", cmd_parser);
//
SwitchArg template_only("", "1d", "Interpret entire read as 1D template only.", cmd_parser);
SwitchArg single_strand_scaling("", "single-strand-scaling", "Train scaling parameters per strand.", cmd_parser);
SwitchArg double_strand_scaling("", "double-strand-scaling", "Train scaling parameters per read. (default)", cmd_parser);
SwitchArg no_train_transitions("", "no-train-transitions", "Do not train state transitions.", cmd_parser);
......@@ -919,18 +925,31 @@ int main(int argc, char * argv[])
Fast5_Summary_Type::min_ed_events() = opts::min_ed_events;
Fast5_Summary_Type::max_ed_events() = opts::max_ed_events;
Fast5_Summary_Type::eventdetection_group() = opts::ed_group;
Fast5_Summary_Type::template_only() = opts::template_only;
Fast5_Summary_Type::trim_margins() = {{ opts::trim_ed_sq_start, opts::trim_ed_sq_end, opts::trim_ed_hp_start, opts::trim_ed_hp_end }};
LOG (info) << "eventdetection_group=" << (Fast5_Summary_Type::eventdetection_group().empty()
? string("smallest")
: Fast5_Summary_Type::eventdetection_group()) << endl;
//
// set pore-related options
//
if (not opts::train_drift.get().empty()
and opts::train_drift.get() != "0"
and opts::train_drift.get() != "1")
{
LOG(error) << "train-drift not understdood: " << opts::train_drift.get() << endl;
return EXIT_FAILURE;
}
if (opts::pore.get() == "r9")
{
Fast5_Summary_Type::abasic_level_top_percent() = 1.0;
Fast5_Summary_Type::abasic_level_top_offset() = 0.0;
Fast5_Summary_Type::hairpin_island_window_size() = 10;
Fast5_Summary_Type::hairpin_island_window_load() = 5;
if (opts::train_drift.get().empty())
{
opts::train_drift.get() = "0";
}
}
else if (opts::pore.get() == "r73")
{
......@@ -938,12 +957,25 @@ int main(int argc, char * argv[])
Fast5_Summary_Type::abasic_level_top_offset() = 5.0;
Fast5_Summary_Type::hairpin_island_window_size() = 5;
Fast5_Summary_Type::hairpin_island_window_load() = 5;
if (opts::train_drift.get().empty())
{
opts::train_drift.get() = "1";
}
}
else
{
LOG(error) << "unknown pore type: " << opts::pore.get() << endl;
return EXIT_FAILURE;
}
Parameter_Trainer_Type::pm_train_drift() = opts::train_drift.get() == "1";
LOG(info)
<< "ed_event_trimming: "
<< " sq_start=" << Fast5_Summary_Type::trim_margins()[0]
<< " sq_end=" << Fast5_Summary_Type::trim_margins()[1]
<< " hp_start=" << Fast5_Summary_Type::trim_margins()[2]
<< " hp_end=" << Fast5_Summary_Type::trim_margins()[3] << endl;
if (not opts::template_only.get())
{
LOG(info)
<< "hairpin_detection:"
<< " abasic_level_top_percent=" << Fast5_Summary_Type::abasic_level_top_percent()
......@@ -951,6 +983,12 @@ int main(int argc, char * argv[])
<< " hairpin_island_window_size=" << Fast5_Summary_Type::hairpin_island_window_size()
<< " hairpin_island_window_load=" << Fast5_Summary_Type::hairpin_island_window_load()
<< endl;
}
else
{
LOG(info)
<< "hairpin_detection: disabled" << endl;
}
//
// set training option
//
......@@ -1034,6 +1072,7 @@ int main(int argc, char * argv[])
LOG(info) << "scaling_max_rounds=" << opts::scaling_max_rounds.get() << endl;
LOG(info) << "scaling_min_progress=" << opts::scaling_min_progress.get() << endl;
LOG(info) << "scaling_select_threshold=" << opts::scaling_select_threshold.get() << endl;
LOG(info) << "train_drift=" << opts::train_drift.get() << endl;
}
}
LOG(info) << "basecall=" << opts::basecall.get() << endl;
......