Skip to content
Commits on Source (3)
......@@ -13,17 +13,4 @@ script:
- sudo make install
- megahit --test
- megahit --test --kmin-1pass
- megahit --test --no-hw-accel
after_success:
# Create lcov report
- wget http://downloads.sourceforge.net/ltp/lcov-1.14.tar.gz
- tar zvxf lcov-1.14.tar.gz
- export PATH=lcov-1.14/bin/:${PATH}
- lcov --capture --directory . --output-file coverage.info
- lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter system-files
- lcov --remove coverage.info '*xxhash/*' --output-file coverage.info # filter xxhash-files
- lcov --remove coverage.info '*parallel_hashmap/*' --output-file coverage.info # filter parallel-hashmap-files
- lcov --remove coverage.info '*pprintpp/*' --output-file coverage.info # filter pprintpp files
- lcov --list coverage.info # debug info
# Uploading report to CodeCov
- bash <(curl -s https://codecov.io/bash) -f coverage.info || echo "Codecov did not collect coverage reports"
\ No newline at end of file
- megahit --test --no-hw-accelo || echo "Codecov did not collect coverage reports"
### 1.2.9 / 2019-10-13
- Fix segfault triggered by length-zero sequences
- Fix memory detection problem for some outdated MacOS versions
- Fix an incorrect assertion in unitig graph refreshing
- Added `--verbose` to output full log to the screen
### 1.2.8 / 2019-08-10
- Add intermediate `megahit_core_popcnt` for CPUs that have ABM but not BMI2
- Allow new assembly task with `--continue`
......
......@@ -101,9 +101,11 @@ add_custom_target(
simple_test
COMMAND ./megahit --test -t 2
COMMAND MEGAHIT_NUM_MERCY_FACTOR=1.5 ./megahit --test -t 4 --mem-flag 0 --no-hw-accel
COMMAND ./megahit --test -t 2 --kmin-1pass
COMMAND rm -rf test-random && python3 ../test_data/generate_random_fasta.py > random.fa && ./megahit -r random.fa --k-list 255 --min-count 1 -o test-random
COMMAND ./megahit --test -t 2 --kmin-1pass --prune-level 3 --prune-depth 0
COMMAND rm -rf test-random && python3 ${TEST_DATA}/generate_random_fasta.py > random.fa && ./megahit -r random.fa --k-list 255 --min-count 1 -o test-random
COMMAND rm -rf test-fastg && ./megahit --test -t 2 --mem-flag 2 --keep-tmp-files -o test-fastg
COMMAND rm -rf test-empty && ./megahit -r ${TEST_DATA}/empty.fa -o test-empty
COMMAND rm -rf test-no-contig && ./megahit -r ${TEST_DATA}/r4.fa -o test-no-contig
COMMAND ./megahit_toolkit contig2fastg 59 test-fastg/intermediate_contigs/k59.contigs.fa > 59.fastg
COMMAND ./megahit_toolkit readstat < test-fastg/intermediate_contigs/k59.contigs.fa
)
......
......@@ -19,9 +19,9 @@ conda install -c bioconda megahit
### Pre-built binaries for x86_64 Linux
```sh
wget https://github.com/voutcn/megahit/releases/download/v1.2.8/MEGAHIT-1.2.8-Linux-x86_64-static.tar.gz
tar zvxf MEGAHIT-1.2.8-Linux-x86_64-static.tar.gz
cd MEGAHIT-1.2.8-Linux-x86_64-static/bin/
wget https://github.com/voutcn/megahit/releases/download/v1.2.9/MEGAHIT-1.2.9-Linux-x86_64-static.tar.gz
tar zvxf MEGAHIT-1.2.9-Linux-x86_64-static.tar.gz
cd MEGAHIT-1.2.9-Linux-x86_64-static/bin/
./megahit --test # run on a toy dataset
./megahit -1 MY_PE_READ_1.fq.gz -2 MY_PE_READ_2.fq.gz -o MY_OUTPUT_DIR
```
......
jobs:
- job: ubuntu_1604
pool:
vmImage: 'Ubuntu-16.04'
strategy:
matrix:
python36:
python.version: '3.6'
build.type: 'Debug'
sanitizer: 'ON'
static: 'OFF'
Python27:
python.version: '2.7'
build.type: 'Release'
sanitizer: 'OFF'
static: 'ON'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
addToPath: true
- script: |
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=$(build.type) -DSANITIZER=$(sanitizer) -DSTATIC_BUILD=$(static) ..
make simple_test -j `nproc`
displayName: 'build and test'
- job: macos
strategy:
matrix:
1013:
image: macos-10.13
latest:
image: macos-latest
pool:
vmImage: $(image)
steps:
- script: |
brew install cmake gcc@9 zlib bzip2
displayName: 'install dependencies'
- script: |
mkdir build
cd build
CC=gcc-9 CXX=g++-9 cmake ..
make simple_test -j `sysctl -n hw.physicalcpu`
displayName: 'build and test'
- job: assembly
timeoutInMinutes: 0
strategy:
matrix:
codecov:
build.type: 'Release'
sanitizer: 'OFF'
coverage: 'ON'
sanitize:
build.type: 'Debug'
sanitizer: 'ON'
coverage: 'OFF'
pool:
vmImage: 'Ubuntu-16.04'
steps:
- script: |
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=$(build.type) -DSANITIZER=$(sanitizer) -DCOVERAGE=$(coverage) ..
make -j `nproc`
make simple_test
sudo make install
displayName: 'build and test'
- script: |
curl -o- ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR752/007/SRR7521507/SRR7521507_1.fastq.gz | gzip -cd | head -4000000 | gzip -1 > 1.fq.gz
curl -o- ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR752/007/SRR7521507/SRR7521507_2.fastq.gz | gzip -cd | head -4000000 | gzip -1 > 2.fq.gz
megahit --presets meta-large -1 1.fq.gz -2 2.fq.gz -m5e9 --verbose
displayName: 'assemble'
- script: |
if [ $(coverage) = 'ON' ]; then
wget http://downloads.sourceforge.net/ltp/lcov-1.14.tar.gz
tar zvxf lcov-1.14.tar.gz
export PATH=lcov-1.14/bin/:${PATH}
lcov --capture --directory . --output-file coverage.info
lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter system-files
lcov --remove coverage.info '*xxhash/*' --output-file coverage.info # filter xxhash-files
lcov --remove coverage.info '*parallel_hashmap/*' --output-file coverage.info # filter parallel-hashmap-files
lcov --remove coverage.info '*pprintpp/*' --output-file coverage.info # filter pprintpp files
lcov --list coverage.info # debug info
bash <(curl -s https://codecov.io/bash) -f coverage.info -t $(CODECOV_TOKEN) || echo "Codecov did not collect coverage reports"
fi
displayName: 'codecov'
coverage:
status:
patch:
default:
target: 0%
project:
default:
target: 0%
\ No newline at end of file
megahit (1.2.8-1) UNRELEASED; urgency=medium
megahit (1.2.9-1) UNRELEASED; urgency=medium
* Initial release (Closes: #<bug>)
* New upstream release
-- Shayan Doust <hello@shayandoust.me> Mon, 07 Oct 2019 23:03:56 +0100
-- Shayan Doust <shayandoust@debian> Tue, 22 Oct 2019 13:15:38 +0100
......@@ -5,7 +5,8 @@ Section: science
Priority: optional
Build-Depends: debhelper-compat (= 12),
cmake,
zlib1g-dev
zlib1g-dev,
g++
Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/megahit
Vcs-Git: https://salsa.debian.org/med-team/megahit.git
......
......@@ -3,10 +3,10 @@ Author: Shayan Doust <hello@shayandoust.me>
Last-Update: 2019-10-11
---
Index: megahit/src/megahit
Index: megahit-1.2.9/src/megahit
===================================================================
--- megahit.orig/src/megahit 2019-10-11 17:11:14.912761645 +0100
+++ megahit/src/megahit 2019-10-11 17:11:14.908761605 +0100
--- megahit-1.2.9.orig/src/megahit 2019-10-22 13:15:58.504338175 +0100
+++ megahit-1.2.9/src/megahit 2019-10-22 13:15:58.500338190 +0100
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
......@@ -22,7 +22,7 @@ Index: megahit/src/megahit
import getopt
import json
@@ -373,7 +373,7 @@
@@ -375,7 +375,7 @@
elif option == '--max-tip-len':
opt.max_tip_len = int(value)
elif option == '--merge-level':
......
......@@ -6,15 +6,15 @@ Last-Update: 2019-10-11
Index: megahit/src/sequence/kmer.h
===================================================================
--- megahit.orig/src/sequence/kmer.h 2019-10-11 17:00:20.754408247 +0100
+++ megahit/src/sequence/kmer.h 2019-10-11 17:00:20.750408208 +0100
--- megahit.orig/src/sequence/kmer.h 2019-10-21 11:34:23.623134886 +0100
+++ megahit/src/sequence/kmer.h 2019-10-21 11:34:51.959024968 +0100
@@ -113,7 +113,9 @@
}
for (unsigned i = 0; i + i < used_words; ++i) {
- std::swap(data_[i], data_[used_words - 1 - i]);
+ auto a = data_[i];
+ auto b = data_[used_words - 1 - i];
+ Kmer<1, long unsigned int>::word_type a = data_[i];
+ Kmer<1, long unsigned int>::word_type b = data_[used_words - 1 - i];
+ std::swap(a, b);
}
......
......@@ -4,11 +4,11 @@ Author: Shayan Doust <hello@shayandoust.me>
Last-Update: 2019-10-15
---
Index: megahit/src/megahit
Index: megahit-1.2.9/src/megahit
===================================================================
--- megahit.orig/src/megahit 2019-10-15 06:06:49.207438632 +0100
+++ megahit/src/megahit 2019-10-15 06:06:49.203438593 +0100
@@ -566,14 +566,9 @@
--- megahit-1.2.9.orig/src/megahit 2019-10-22 13:15:53.688356856 +0100
+++ megahit-1.2.9/src/megahit 2019-10-22 13:15:53.680356887 +0100
@@ -570,14 +570,9 @@
def find_test_data_path():
......
......@@ -16,6 +16,5 @@ bool RemoveLocalLowDepth(UnitigGraph &graph, double min_depth, uint32_t max_len,
uint32_t IterateLocalLowDepth(UnitigGraph &graph, double min_depth,
uint32_t min_len, uint32_t local_width,
double local_ratio, bool permanent_rm = false);
uint32_t RemoveLowDepth(UnitigGraph &graph, double min_depth);
#endif // MEGAHIT_LOW_DEPTH_REMOVER_H
......@@ -312,7 +312,6 @@ void UnitigGraph::Refresh(bool set_changed) {
while (true) {
next_adapter = NextSimplePathAdapter(next_adapter);
assert(next_adapter.IsValid());
assert(!(next_adapter.GetFlag() & kDeleted));
if (next_adapter.b() == adapter.b()) {
break;
}
......
......@@ -25,7 +25,7 @@
#include <stdint.h>
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "v1.2.8"
#define PACKAGE_VERSION "v1.2.9"
#endif
#include "sdbg/sdbg_def.h"
......
......@@ -224,9 +224,11 @@ void MapToContigs(const HashMapper &mapper,
void AssembleAndOutput(const HashMapper &mapper, const SeqPackage &read_pkg,
MappingResultCollector &result_collector,
const std::string &output_file, int32_t local_range,
const std::string &output_file,
const int32_t local_range,
const LocalAsmOption &opt) {
size_t min_num_reads = local_range / read_pkg.max_length();
const size_t min_num_reads = read_pkg.max_length() > 0 ?
local_range / read_pkg.max_length(): 1;
xinfo("Minimum number of reads to do local assembly: {}\n", min_num_reads);
Sequence seq, contig_end;
......
......@@ -195,6 +195,7 @@ class Options:
self.pe12 = []
self.se = []
self.presets = ''
self.verbose = False
@property
def log_file_name(self):
......@@ -321,6 +322,7 @@ def parse_option(argv):
'mem-flag=',
'continue',
'version',
'verbose',
'out-prefix=',
'presets=',
'test',
......@@ -398,6 +400,8 @@ def parse_option(argv):
elif option in ('-v', '--version'):
print(software_info.megahit_version)
exit(0)
elif option == '--verbose':
opt.verbose = True
elif option == '--continue':
opt.continue_mode = True
elif option == '--out-prefix':
......@@ -591,11 +595,19 @@ def check_reads():
def detect_available_mem():
try:
psize = os.sysconf('SC_PAGE_SIZE')
pcount = os.sysconf('SC_PHYS_PAGES')
if psize < 0 or pcount < 0:
raise SystemError
return psize * pcount
except ValueError:
if sys.platform.find("darwin") != -1:
return int(float(os.popen("sysctl hw.memsize").readlines()[0].split()[1]))
elif sys.platform.find("linux") != -1:
return int(float(os.popen("free").readlines()[1].split()[1]) * 1024)
else:
raise
def cpu_dispatch():
......@@ -926,6 +938,8 @@ def merge_final(final_k):
def run_sub_command(cmd, msg, verbose=False):
if opt.verbose:
verbose = True
logger.info(msg)
logger.debug('command %s' % ' '.join(cmd))
......
......@@ -12,7 +12,8 @@
class BinaryReader : public BaseSequenceReader {
public:
explicit BinaryReader(const std::string &filename) : is_(filename) {
explicit BinaryReader(const std::string &filename)
: is_(filename), buf_(120) {
if (is_.bad()) {
throw std::invalid_argument("Failed to open file " + filename);
}
......@@ -33,14 +34,14 @@ class BinaryReader : public BaseSequenceReader {
if (buf_.size() < num_words) {
buf_.resize(num_words);
}
auto bytes_read = reader_.read(&buf_[0], num_words);
auto bytes_read = reader_.read(buf_.data(), num_words);
assert(bytes_read == num_words * sizeof(buf_[0]));
(void)(bytes_read);
if (!reverse) {
pkg->AppendCompactSequence(&buf_[0], read_len);
pkg->AppendCompactSequence(buf_.data(), read_len);
} else {
pkg->AppendReversedCompactSequence(&buf_[0], read_len);
pkg->AppendReversedCompactSequence(buf_.data(), read_len);
}
num_bases += read_len;
......
......@@ -22,7 +22,10 @@ class Kmer {
using word_type = TWord;
static const unsigned kNumWords = NWords;
Kmer() { std::memset(data_, 0, sizeof(data_)); }
Kmer() {
static_assert(sizeof(*this) == sizeof(TWord) * NWords, "");
std::memset(data_, 0, sizeof(data_));
}
Kmer(const Kmer &kmer) { std::memcpy(data_, kmer.data_, sizeof(data_)); }
......@@ -214,7 +217,7 @@ class Kmer {
private:
word_type data_[kNumWords];
} __attribute__((packed));
};
namespace std {
template <const unsigned NumWords, typename T>
......
......@@ -259,6 +259,12 @@ class SequencePackage {
}
void AppendStringSequence(const char *from, const char *to, unsigned len) {
if (len == 0) {
// Fake a sequence whose length is 1, as we need all sequences' length > 0
// to make `GetSeqID` working
auto fake_sequence = "A";
return AppendStringSequence(fake_sequence, fake_sequence + 1, 1);
}
UpdateLength(len);
std::ptrdiff_t step = from < to ? 1 : -1;
for (auto ptr = from; ptr != to; ptr += step) {
......@@ -267,7 +273,14 @@ class SequencePackage {
}
void AppendCompactSequence(const TWord *ptr, unsigned len, bool rev) {
if (len == 0) {
// Fake a sequence whose length is 1, as we need all sequences' length > 0
// to make `GetSeqID` working
TWord fake_sequence = 0;
return AppendCompactSequence(&fake_sequence, 1, false);
}
UpdateLength(len);
if (rev) {
auto rptr = ptr + DivCeiling(len, kBasesPerWord) - 1;
unsigned bases_in_last_word = len % kBasesPerWord;
......
......@@ -218,7 +218,8 @@ void BaseSequenceSortingEngine::Lv0PrepareThreadPartition() {
int64_t average = meta_.num_sequences / n_threads_;
meta.seq_from = t * average;
meta.seq_to = t < n_threads_ - 1 ? (t + 1) * average : meta_.num_sequences;
meta.offset_base = Lv0EncodeDiffBase(meta.seq_from);
meta.offset_base = meta.seq_from < meta_.num_sequences ?
Lv0EncodeDiffBase(meta.seq_from) : std::numeric_limits<int64_t>::max();
}
for (unsigned i = 0; i < kNumBuckets; ++i) {
......