Shayan Doust · Shayan Doust · Shayan Doust · 5f32ec3b · 5f32ec3b · 5f32ec3b
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,17 +13,4 @@ script:
  - sudo make install
  - megahit --test
  - megahit --test --kmin-1pass
-  - megahit --test --no-hw-accel
-after_success:
-  # Create lcov report
-  - wget http://downloads.sourceforge.net/ltp/lcov-1.14.tar.gz
-  - tar zvxf lcov-1.14.tar.gz
-  - export PATH=lcov-1.14/bin/:${PATH}
-  - lcov --capture --directory . --output-file coverage.info
-  - lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter system-files
-  - lcov --remove coverage.info '*xxhash/*' --output-file coverage.info # filter xxhash-files
-  - lcov --remove coverage.info '*parallel_hashmap/*' --output-file coverage.info # filter parallel-hashmap-files
-  - lcov --remove coverage.info '*pprintpp/*' --output-file coverage.info # filter pprintpp files
-  - lcov --list coverage.info # debug info
-  # Uploading report to CodeCov
-  - bash <(curl -s https://codecov.io/bash) -f coverage.info || echo "Codecov did not collect coverage reports"
\ No newline at end of file
+  - megahit --test --no-hw-accelo || echo "Codecov did not collect coverage reports"
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
+### 1.2.9 / 2019-10-13
+-   Fix segfault triggered by length-zero sequences
+-   Fix memory detection problem for some outdated MacOS versions
+-   Fix an incorrect assertion in unitig graph refreshing
+-   Added `--verbose` to output full log to the screen
+
 ### 1.2.8 / 2019-08-10
 -   Add intermediate `megahit_core_popcnt` for CPUs that have ABM but not BMI2
 -   Allow new assembly task with `--continue`

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -101,9 +101,11 @@ add_custom_target(
        simple_test
        COMMAND ./megahit --test -t 2
        COMMAND MEGAHIT_NUM_MERCY_FACTOR=1.5 ./megahit --test -t 4 --mem-flag 0 --no-hw-accel
-        COMMAND ./megahit --test -t 2 --kmin-1pass
-        COMMAND rm -rf test-random && python3 ../test_data/generate_random_fasta.py > random.fa && ./megahit -r random.fa --k-list 255 --min-count 1 -o test-random
+        COMMAND ./megahit --test -t 2 --kmin-1pass --prune-level 3 --prune-depth 0
+        COMMAND rm -rf test-random && python3 ${TEST_DATA}/generate_random_fasta.py > random.fa && ./megahit -r random.fa --k-list 255 --min-count 1 -o test-random
        COMMAND rm -rf test-fastg && ./megahit --test -t 2 --mem-flag 2 --keep-tmp-files -o test-fastg
+        COMMAND rm -rf test-empty && ./megahit -r ${TEST_DATA}/empty.fa -o test-empty
+        COMMAND rm -rf test-no-contig && ./megahit -r ${TEST_DATA}/r4.fa -o test-no-contig
        COMMAND ./megahit_toolkit contig2fastg 59 test-fastg/intermediate_contigs/k59.contigs.fa > 59.fastg
        COMMAND ./megahit_toolkit readstat < test-fastg/intermediate_contigs/k59.contigs.fa
 )

--- a/README.md
+++ b/README.md
@@ -19,9 +19,9 @@ conda install -c bioconda megahit
 ### Pre-built binaries for x86_64 Linux

 ```sh
-wget https://github.com/voutcn/megahit/releases/download/v1.2.8/MEGAHIT-1.2.8-Linux-x86_64-static.tar.gz
-tar zvxf MEGAHIT-1.2.8-Linux-x86_64-static.tar.gz
-cd MEGAHIT-1.2.8-Linux-x86_64-static/bin/
+wget https://github.com/voutcn/megahit/releases/download/v1.2.9/MEGAHIT-1.2.9-Linux-x86_64-static.tar.gz
+tar zvxf MEGAHIT-1.2.9-Linux-x86_64-static.tar.gz
+cd MEGAHIT-1.2.9-Linux-x86_64-static/bin/
 ./megahit --test  # run on a toy dataset
 ./megahit -1 MY_PE_READ_1.fq.gz -2 MY_PE_READ_2.fq.gz -o MY_OUTPUT_DIR
 ```

--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
+jobs:
+  - job: ubuntu_1604
+    pool:
+      vmImage: 'Ubuntu-16.04'
+    strategy:
+      matrix:
+        python36:
+          python.version: '3.6'
+          build.type: 'Debug'
+          sanitizer: 'ON'
+          static: 'OFF'
+        Python27:
+          python.version: '2.7'
+          build.type: 'Release'
+          sanitizer: 'OFF'
+          static: 'ON'
+    steps:
+      - task: UsePythonVersion@0
+        inputs:
+          versionSpec: '$(python.version)'
+          addToPath: true
+      - script: |
+          mkdir build
+          cd build
+          cmake -DCMAKE_BUILD_TYPE=$(build.type) -DSANITIZER=$(sanitizer) -DSTATIC_BUILD=$(static) ..
+          make simple_test -j `nproc`
+        displayName: 'build and test'
+
+  - job: macos
+    strategy:
+      matrix:
+        1013:
+          image: macos-10.13
+        latest:
+          image: macos-latest
+    pool:
+      vmImage: $(image)
+    steps:
+      - script: |
+          brew install cmake gcc@9 zlib bzip2
+        displayName: 'install dependencies'
+      - script: |
+          mkdir build
+          cd build
+          CC=gcc-9 CXX=g++-9 cmake ..
+          make simple_test -j `sysctl -n hw.physicalcpu`
+        displayName: 'build and test'
+
+  - job: assembly
+    timeoutInMinutes: 0
+    strategy:
+      matrix:
+        codecov:
+          build.type: 'Release'
+          sanitizer: 'OFF'
+          coverage: 'ON'
+        sanitize:
+          build.type: 'Debug'
+          sanitizer: 'ON'
+          coverage: 'OFF'
+    pool:
+      vmImage: 'Ubuntu-16.04'
+    steps:
+      - script: |
+          mkdir build
+          cd build
+          cmake -DCMAKE_BUILD_TYPE=$(build.type) -DSANITIZER=$(sanitizer) -DCOVERAGE=$(coverage) ..
+          make -j `nproc`
+          make simple_test
+          sudo make install
+        displayName: 'build and test'
+      - script: |
+          curl -o- ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR752/007/SRR7521507/SRR7521507_1.fastq.gz | gzip -cd | head -4000000 | gzip -1 > 1.fq.gz
+          curl -o- ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR752/007/SRR7521507/SRR7521507_2.fastq.gz | gzip -cd | head -4000000 | gzip -1 > 2.fq.gz
+          megahit --presets meta-large -1 1.fq.gz -2 2.fq.gz -m5e9 --verbose
+        displayName: 'assemble'
+      - script: |
+          if [ $(coverage) = 'ON' ]; then
+            wget http://downloads.sourceforge.net/ltp/lcov-1.14.tar.gz
+            tar zvxf lcov-1.14.tar.gz
+            export PATH=lcov-1.14/bin/:${PATH}
+            lcov --capture --directory . --output-file coverage.info
+            lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter system-files
+            lcov --remove coverage.info '*xxhash/*' --output-file coverage.info # filter xxhash-files
+            lcov --remove coverage.info '*parallel_hashmap/*' --output-file coverage.info # filter parallel-hashmap-files
+            lcov --remove coverage.info '*pprintpp/*' --output-file coverage.info # filter pprintpp files
+            lcov --list coverage.info # debug info
+            bash <(curl -s https://codecov.io/bash) -f coverage.info -t $(CODECOV_TOKEN) || echo "Codecov did not collect coverage reports"
+          fi
+        displayName: 'codecov'
--- a/codecov.yml
+++ b/codecov.yml
+coverage:
+  status:
+    patch:
+      default:
+        target: 0%
+    project:
+      default:
+        target: 0%
\ No newline at end of file
--- a/debian/changelog
+++ b/debian/changelog
-megahit (1.2.8-1) UNRELEASED; urgency=medium
+megahit (1.2.9-1) UNRELEASED; urgency=medium

  * Initial release (Closes: #<bug>)
+  * New upstream release

- -- Shayan Doust <hello@shayandoust.me>  Mon, 07 Oct 2019 23:03:56 +0100
+ -- Shayan Doust <shayandoust@debian>  Tue, 22 Oct 2019 13:15:38 +0100
--- a/debian/control
+++ b/debian/control
@@ -5,7 +5,8 @@ Section: science
 Priority: optional
 Build-Depends: debhelper-compat (= 12),
               cmake,
-               zlib1g-dev
+               zlib1g-dev,
+	       g++
 Standards-Version: 4.4.0
 Vcs-Browser: https://salsa.debian.org/med-team/megahit
 Vcs-Git: https://salsa.debian.org/med-team/megahit.git

--- a/debian/patches/2to3.patch
+++ b/debian/patches/2to3.patch
@@ -3,10 +3,10 @@ Author: Shayan Doust <hello@shayandoust.me>
 Last-Update: 2019-10-11
 ---

-Index: megahit/src/megahit
+Index: megahit-1.2.9/src/megahit
 ===================================================================
--- megahit.orig/src/megahit	2019-10-11 17:11:14.912761645 +0100
-+++ megahit/src/megahit	2019-10-11 17:11:14.908761605 +0100
+--- megahit-1.2.9.orig/src/megahit	2019-10-22 13:15:58.504338175 +0100
+++ megahit-1.2.9/src/megahit	2019-10-22 13:15:58.500338190 +0100
 @@ -1,4 +1,4 @@
 -#!/usr/bin/env python
 +#!/usr/bin/python3
@@ -22,7 +22,7 @@ Index: megahit/src/megahit
 
 import getopt
 import json
-@@ -373,7 +373,7 @@
+@@ -375,7 +375,7 @@
         elif option == '--max-tip-len':
             opt.max_tip_len = int(value)
         elif option == '--merge-level':

--- a/debian/patches/fix_src_packed_field.patch
+++ b/debian/patches/fix_src_packed_field.patch
@@ -6,15 +6,15 @@ Last-Update: 2019-10-11

 Index: megahit/src/sequence/kmer.h
 ===================================================================
--- megahit.orig/src/sequence/kmer.h	2019-10-11 17:00:20.754408247 +0100
-+++ megahit/src/sequence/kmer.h	2019-10-11 17:00:20.750408208 +0100
+--- megahit.orig/src/sequence/kmer.h	2019-10-21 11:34:23.623134886 +0100
+++ megahit/src/sequence/kmer.h	2019-10-21 11:34:51.959024968 +0100
 @@ -113,7 +113,9 @@
     }
 
     for (unsigned i = 0; i + i < used_words; ++i) {
 -      std::swap(data_[i], data_[used_words - 1 - i]);
-+      auto a = data_[i];
-+      auto b = data_[used_words - 1 - i];
+      Kmer<1, long unsigned int>::word_type a = data_[i];
+      Kmer<1, long unsigned int>::word_type b = data_[used_words - 1 - i];
 +      std::swap(a, b);
     }
 

--- a/debian/patches/fix_test_path.patch
+++ b/debian/patches/fix_test_path.patch
@@ -4,11 +4,11 @@ Author: Shayan Doust <hello@shayandoust.me>
 Last-Update: 2019-10-15
 ---

-Index: megahit/src/megahit
+Index: megahit-1.2.9/src/megahit
 ===================================================================
--- megahit.orig/src/megahit	2019-10-15 06:06:49.207438632 +0100
-+++ megahit/src/megahit	2019-10-15 06:06:49.203438593 +0100
-@@ -566,14 +566,9 @@
+--- megahit-1.2.9.orig/src/megahit	2019-10-22 13:15:53.688356856 +0100
+++ megahit-1.2.9/src/megahit	2019-10-22 13:15:53.680356887 +0100
+@@ -570,14 +570,9 @@
 
 
 def find_test_data_path():

--- a/src/assembly/low_depth_remover.h
+++ b/src/assembly/low_depth_remover.h
@@ -16,6 +16,5 @@ bool RemoveLocalLowDepth(UnitigGraph &graph, double min_depth, uint32_t max_len,
 uint32_t IterateLocalLowDepth(UnitigGraph &graph, double min_depth,
                              uint32_t min_len, uint32_t local_width,
                              double local_ratio, bool permanent_rm = false);
-uint32_t RemoveLowDepth(UnitigGraph &graph, double min_depth);

 #endif  // MEGAHIT_LOW_DEPTH_REMOVER_H
--- a/src/assembly/unitig_graph.cpp
+++ b/src/assembly/unitig_graph.cpp
@@ -312,7 +312,6 @@ void UnitigGraph::Refresh(bool set_changed) {
      while (true) {
        next_adapter = NextSimplePathAdapter(next_adapter);
        assert(next_adapter.IsValid());
-        assert(!(next_adapter.GetFlag() & kDeleted));
        if (next_adapter.b() == adapter.b()) {
          break;
        }

--- a/src/definitions.h
+++ b/src/definitions.h
@@ -25,7 +25,7 @@
 #include <stdint.h>

 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "v1.2.8"
+#define PACKAGE_VERSION "v1.2.9"
 #endif

 #include "sdbg/sdbg_def.h"

--- a/src/localasm/local_assemble.cpp
+++ b/src/localasm/local_assemble.cpp
@@ -224,9 +224,11 @@ void MapToContigs(const HashMapper &mapper,

 void AssembleAndOutput(const HashMapper &mapper, const SeqPackage &read_pkg,
                       MappingResultCollector &result_collector,
-                       const std::string &output_file, int32_t local_range,
+                       const std::string &output_file,
+                       const int32_t local_range,
                       const LocalAsmOption &opt) {
-  size_t min_num_reads = local_range / read_pkg.max_length();
+  const size_t min_num_reads = read_pkg.max_length() > 0 ?
+      local_range / read_pkg.max_length(): 1;
  xinfo("Minimum number of reads to do local assembly: {}\n", min_num_reads);

  Sequence seq, contig_end;

--- a/src/megahit
+++ b/src/megahit
@@ -195,6 +195,7 @@ class Options:
        self.pe12 = []
        self.se = []
        self.presets = ''
+        self.verbose = False

    @property
    def log_file_name(self):
@@ -321,6 +322,7 @@ def parse_option(argv):
                                    'mem-flag=',
                                    'continue',
                                    'version',
+                                    'verbose',
                                    'out-prefix=',
                                    'presets=',
                                    'test',
@@ -398,6 +400,8 @@ def parse_option(argv):
        elif option in ('-v', '--version'):
            print(software_info.megahit_version)
            exit(0)
+        elif option == '--verbose':
+            opt.verbose = True
        elif option == '--continue':
            opt.continue_mode = True
        elif option == '--out-prefix':
@@ -591,11 +595,19 @@ def check_reads():


 def detect_available_mem():
+    try:
        psize = os.sysconf('SC_PAGE_SIZE')
        pcount = os.sysconf('SC_PHYS_PAGES')
        if psize < 0 or pcount < 0:
            raise SystemError
        return psize * pcount
+    except ValueError:
+        if sys.platform.find("darwin") != -1:
+            return int(float(os.popen("sysctl hw.memsize").readlines()[0].split()[1]))
+        elif sys.platform.find("linux") != -1:
+            return int(float(os.popen("free").readlines()[1].split()[1]) * 1024)
+        else:
+            raise


 def cpu_dispatch():
@@ -926,6 +938,8 @@ def merge_final(final_k):


 def run_sub_command(cmd, msg, verbose=False):
+    if opt.verbose:
+        verbose = True
    logger.info(msg)
    logger.debug('command %s' % ' '.join(cmd))


--- a/src/sequence/io/binary_reader.h
+++ b/src/sequence/io/binary_reader.h
@@ -12,7 +12,8 @@

 class BinaryReader : public BaseSequenceReader {
 public:
-  explicit BinaryReader(const std::string &filename) : is_(filename) {
+  explicit BinaryReader(const std::string &filename)
+      : is_(filename), buf_(120) {
    if (is_.bad()) {
      throw std::invalid_argument("Failed to open file " + filename);
    }
@@ -33,14 +34,14 @@ class BinaryReader : public BaseSequenceReader {
      if (buf_.size() < num_words) {
        buf_.resize(num_words);
      }
-      auto bytes_read = reader_.read(&buf_[0], num_words);
+      auto bytes_read = reader_.read(buf_.data(), num_words);
      assert(bytes_read == num_words * sizeof(buf_[0]));
      (void)(bytes_read);

      if (!reverse) {
-        pkg->AppendCompactSequence(&buf_[0], read_len);
+        pkg->AppendCompactSequence(buf_.data(), read_len);
      } else {
-        pkg->AppendReversedCompactSequence(&buf_[0], read_len);
+        pkg->AppendReversedCompactSequence(buf_.data(), read_len);
      }

      num_bases += read_len;

--- a/src/sequence/kmer.h
+++ b/src/sequence/kmer.h
@@ -22,7 +22,10 @@ class Kmer {
  using word_type = TWord;
  static const unsigned kNumWords = NWords;

-  Kmer() { std::memset(data_, 0, sizeof(data_)); }
+  Kmer() {
+    static_assert(sizeof(*this) == sizeof(TWord) * NWords, "");
+    std::memset(data_, 0, sizeof(data_));
+  }

  Kmer(const Kmer &kmer) { std::memcpy(data_, kmer.data_, sizeof(data_)); }

@@ -214,7 +217,7 @@ class Kmer {

 private:
  word_type data_[kNumWords];
-} __attribute__((packed));
+};

 namespace std {
 template <const unsigned NumWords, typename T>

--- a/src/sequence/sequence_package.h
+++ b/src/sequence/sequence_package.h
@@ -259,6 +259,12 @@ class SequencePackage {
  }

  void AppendStringSequence(const char *from, const char *to, unsigned len) {
+    if (len == 0) {
+      // Fake a sequence whose length is 1, as we need all sequences' length > 0
+      // to make `GetSeqID` working
+      auto fake_sequence = "A";
+      return AppendStringSequence(fake_sequence, fake_sequence + 1, 1);
+    }
    UpdateLength(len);
    std::ptrdiff_t step = from < to ? 1 : -1;
    for (auto ptr = from; ptr != to; ptr += step) {
@@ -267,7 +273,14 @@ class SequencePackage {
  }

  void AppendCompactSequence(const TWord *ptr, unsigned len, bool rev) {
+    if (len == 0) {
+      // Fake a sequence whose length is 1, as we need all sequences' length > 0
+      // to make `GetSeqID` working
+      TWord fake_sequence = 0;
+      return AppendCompactSequence(&fake_sequence, 1, false);
+    }
    UpdateLength(len);
+
    if (rev) {
      auto rptr = ptr + DivCeiling(len, kBasesPerWord) - 1;
      unsigned bases_in_last_word = len % kBasesPerWord;

--- a/src/sorting/base_engine.cpp
+++ b/src/sorting/base_engine.cpp
@@ -218,7 +218,8 @@ void BaseSequenceSortingEngine::Lv0PrepareThreadPartition() {
    int64_t average = meta_.num_sequences / n_threads_;
    meta.seq_from = t * average;
    meta.seq_to = t < n_threads_ - 1 ? (t + 1) * average : meta_.num_sequences;
-    meta.offset_base = Lv0EncodeDiffBase(meta.seq_from);
+    meta.offset_base = meta.seq_from < meta_.num_sequences ?
+        Lv0EncodeDiffBase(meta.seq_from) : std::numeric_limits<int64_t>::max();
  }

  for (unsigned i = 0; i < kNumBuckets; ++i) {