Steffen Möller · Steffen Möller · Steffen Möller · Steffen Möller · Steffen Möller · Steffen Möller
--- a/.gitmodules
+++ b/.gitmodules
+[submodule "util/regression"]
+	path = util/regression
+	url = https://github.com/soedinglab/MMseqs2-Regression.git
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,6 +24,7 @@ matrix:
        - zlib1g-dev
        - libbz2-dev
        - vim-common
+        - shellcheck
    env: CC=clang-3.6 CXX=clang++-3.6
  - os: linux
    dist: trusty
@@ -41,6 +42,7 @@ matrix:
        - zlib1g-dev
        - libbz2-dev
        - vim-common
+        - shellcheck
    env: CC=clang-7 CXX=clang++-7
  - os: linux
    dist: trusty
@@ -54,6 +56,7 @@ matrix:
        - zlib1g-dev
        - libbz2-dev
        - vim-common
+        - shellcheck
    env: CC=gcc-4.8 CXX=g++-4.8
  - os: linux
    dist: trusty
@@ -69,6 +72,7 @@ matrix:
        - zlib1g-dev
        - libbz2-dev
        - vim-common
+        - shellcheck
    env: CC=gcc-8 CXX=g++-8
  - os: linux
    dist: trusty
@@ -85,6 +89,7 @@ matrix:
        - libbz2-dev
        - vim-common
        - libopenmpi-dev
+        - shellcheck
    env: MPI=1 CC=gcc-8 CXX=g++-8
  - os: osx
    osx_image: xcode10.1
@@ -96,6 +101,7 @@ matrix:
        - gcc@8
        - zlib
        - bzip2
+        - shellcheck
    env: CC=gcc-8 CXX=g++-8
  allow_failures:
  - env: QEMU_ARM=1
@@ -116,7 +122,7 @@ script:
    elif [[ "$TRAVIS_OS_NAME" == "linux" ]]; then \
      if [[ -n "$MPI" ]]; then MPI=1; else MPI=0; fi; \
      mkdir build; cd build; \
-      cmake -G Ninja -DENABLE_WERROR=1 -DHAVE_MPI="$MPI" -DHAVE_SSE4_1=1 .. \
+      cmake -G Ninja -DENABLE_WERROR=1 -DHAVE_MPI="$MPI" -DHAVE_SSE4_1=1 -DHAVE_TESTS=1 -DREQUIRE_OPENMP=0 .. \
        || exit 1; ninja || exit 1; \
    elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; then \
      ./util/build_osx.sh . build || exit 1; \
@@ -124,18 +130,3 @@ script:
      exit 1; \
    fi

-after_success:
-  - |
-    if [[ "$TRAVIS_PULL_REQUEST" != "false" ]] || [[ "$TRAVIS_OS_NAME" != "osx" ]]; then \
-      exit 0; \
-    fi; \
-    if [[ "$encrypted_4188a201d0b5_key" == "" ]] || [[ "$encrypted_4188a201d0b5_iv" == "" ]]; then \
-      exit 0; \
-    fi; \
-    openssl aes-256-cbc -K "$encrypted_4188a201d0b5_key" -iv "$encrypted_4188a201d0b5_iv" -in ./util/.travis.enc -out "$HOME/.ssh/id_rsa" -d; \
-    chmod 400 "$HOME/.ssh/id_rsa"; \
-    ssh -o StrictHostKeyChecking=no codeship@uniclust.mmseqs.com \
-      "mkdir -p /home/mirdita/repositories/mmseqs-webserver/archive/${TRAVIS_COMMIT}"; \
-    cd build; \
-    scp -o StrictHostKeyChecking=no mmseqs-osx-static_sse41.tar.gz mmseqs-osx-static_avx2.tar.gz \
-        codeship@uniclust.mmseqs.com:/home/mirdita/repositories/mmseqs-webserver/archive/${TRAVIS_COMMIT};
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,6 +6,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

 set(FRAMEWORK_ONLY 0 CACHE BOOL "Framework Mode")
 set(HAVE_SANITIZER 0 CACHE BOOL "Have Sanitizers")
+set(INSTALL_UTIL 1 CACHE BOOL "Install util scripts")
 set(VERSION_OVERRIDE "" CACHE STRING "Override version string in help and usage messages")

 #Sanitizers
@@ -21,7 +22,7 @@ if (NOT CMAKE_BUILD_TYPE)
 endif (NOT CMAKE_BUILD_TYPE)

 # find compiler
-if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
    # using Clang
    message("-- Compiler is clang(++)")
    set(CMAKE_COMPILER_IS_CLANG 1)
@@ -58,26 +59,31 @@ if (APPLE)
    set(MMSEQS_CXX_FLAGS "${MMSEQS_CXX_FLAGS} -D__APPLE_API_STRICT_CONFORMANCE")
 endif ()

+if(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
+    set(MMSEQS_CXX_FLAGS "${MMSEQS_CXX_FLAGS} -D_WITH_GETLINE")
+endif()
+
 # zstd
-SET(ZSTD_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/lib/zstd")
-SET(CMAKE_INSTALL_LIBDIR bin)
+# We use ZSTD_findDecompressedSize which is only available with ZSTD_STATIC_LINKING_ONLY
+# Thus we cannot use a system provided libzstd
+set(ZSTD_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/lib/zstd")
+set(CMAKE_INSTALL_LIBDIR bin)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/lib/zstd/build/cmake/CMakeModules")
-OPTION(ZSTD_LEGACY_SUPPORT "LEGACY SUPPORT" OFF)
-OPTION(ZSTD_BUILD_STATIC "BUILD STATIC LIBRARIES" ON)
-OPTION(ZSTD_BUILD_SHARED "BUILD SHARED LIBRARIES" OFF)
-OPTION(ZSTD_MULTITHREAD_SUPPORT "MULTITHREADING SUPPORT" OFF)
-OPTION(ZSTD_BUILD_PROGRAMS "BUILD PROGRAMS" OFF)
-OPTION(ZSTD_BUILD_CONTRIB "BUILD CONTRIB" OFF)
-OPTION(ZSTD_BUILD_TESTS "BUILD TESTS" OFF)
+option(ZSTD_LEGACY_SUPPORT "LEGACY SUPPORT" OFF)
+option(ZSTD_BUILD_STATIC "BUILD STATIC LIBRARIES" ON)
+option(ZSTD_BUILD_SHARED "BUILD SHARED LIBRARIES" OFF)
+option(ZSTD_MULTITHREAD_SUPPORT "MULTITHREADING SUPPORT" OFF)
+option(ZSTD_BUILD_PROGRAMS "BUILD PROGRAMS" OFF)
+option(ZSTD_BUILD_CONTRIB "BUILD CONTRIB" OFF)
+option(ZSTD_BUILD_TESTS "BUILD TESTS" OFF)
+include_directories(lib/zstd/lib)
 add_subdirectory(lib/zstd/build/cmake/lib EXCLUDE_FROM_ALL)

 # tinyexpr
-OPTION(TE_NAT_LOG "Define the log function as natural logarithm." ON)
 include_directories(lib/tinyexpr)
 add_subdirectory(lib/tinyexpr EXCLUDE_FROM_ALL)

 include_directories(lib)
-include_directories(lib/kseq)
 include_directories(lib/simd)
 include_directories(lib/gzstream)
 include_directories(lib/alp)
@@ -88,6 +94,6 @@ add_subdirectory(lib/alp)
 add_subdirectory(lib/ksw2)
 add_subdirectory(data)
 add_subdirectory(src)
-if (NOT FRAMEWORK_ONLY)
+if (NOT FRAMEWORK_ONLY AND INSTALL_UTIL)
    add_subdirectory(util)
 endif ()
--- a/README.md
+++ b/README.md
 # MMseqs2: ultra fast and sensitive protein search and clustering suite
-MMseqs2 (Many-against-Many sequence searching) is a software suite to search and cluster huge proteins/nucleotide sequence sets. MMseqs2 is open source GPL-licensed software implemented in C++ for Linux, MacOS, and (as beta version, via cygwin) Windows. The software is designed to run on multiple cores and servers and exhibits very good scalability. MMseqs2 can run 10000 times faster than BLAST. At 100 times its speed it achieves almost the same sensitivity. It can perform profile searches with the same sensitivity as PSI-BLAST at over 400 times its speed.
+MMseqs2 (Many-against-Many sequence searching) is a software suite to search and cluster huge protein and nucleotide sequence sets. MMseqs2 is open source GPL-licensed software implemented in C++ for Linux, MacOS, and (as beta version, via cygwin) Windows. The software is designed to run on multiple cores and servers and exhibits very good scalability. MMseqs2 can run 10000 times faster than BLAST. At 100 times its speed it achieves almost the same sensitivity. It can perform profile searches with the same sensitivity as PSI-BLAST at over 400 times its speed.

 ##  Publications

@@ -7,42 +7,36 @@ MMseqs2 (Many-against-Many sequence searching) is a software suite to search and

 [Steinegger M and Soeding J. Clustering huge protein sequence sets in linear time. Nature Communications, doi: 10.1038/s41467-018-04964-5 (2018)](https://www.nature.com/articles/s41467-018-04964-5).

+[Mirdita M, Steinegger M and Soeding J. MMseqs2 desktop and local web server app for fast, interactive sequence searches. Bioinformatics, doi: 10.1093/bioinformatics/bty1057 (2019)](https://academic.oup.com/bioinformatics/article/35/16/2856/5280135)
+
 [![BioConda Install](https://img.shields.io/conda/dn/bioconda/mmseqs2.svg?style=flag&label=BioConda%20install)](https://anaconda.org/bioconda/mmseqs2)
 [![Github All Releases](https://img.shields.io/github/downloads/soedinglab/mmseqs2/total.svg)](https://github.com/soedinglab/mmseqs2/releases/latest)
 [![Docker Pulls](https://img.shields.io/docker/pulls/soedinglab/mmseqs2.svg)](https://hub.docker.com/r/soedinglab/mmseqs2)
 [![Build Status](https://dev.azure.com/themartinsteinegger/mmseqs2/_apis/build/status/soedinglab.MMseqs2?branchName=master)](https://dev.azure.com/themartinsteinegger/mmseqs2/_build/latest?definitionId=2&branchName=master)
-![AppVeyor CI](https://ci.appveyor.com/api/projects/status/lq8nxeb0j8v38d1a?svg=true)
 [![Travis CI](https://travis-ci.org/soedinglab/MMseqs2.svg?branch=master)](https://travis-ci.org/soedinglab/MMseqs2)
 [![Zenodo DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.840208.svg)](https://zenodo.org/record/1718312)

 <p align="center"><img src="https://raw.githubusercontent.com/soedinglab/mmseqs2/master/.github/mmseqs2_logo.png" height="256" /></p>


-## Documentation 
+## Documentation
 The MMseqs2 user guide is available in our [GitHub Wiki](https://github.com/soedinglab/mmseqs2/wiki) or as a [PDF file](https://mmseqs.com/latest/userguide.pdf) (Thanks to [pandoc](https://github.com/jgm/pandoc)!). We provide a tutorial of MMseqs2 [here](https://github.com/soedinglab/metaG-ECCB18-partII).

-## News
 Keep posted about MMseqs2/Linclust updates by following Martin on [Twitter](https://twitter.com/thesteinegger).

-08/10/2018 ECCB18 tutorial of MMseqs2 is available [here](https://github.com/soedinglab/metaG-ECCB18-partII).
-
-07/07/2018 Linclust has just been published at [Nature Communications](https://www.nature.com/articles/s41467-018-04964-5).
-
-17/10/2017 MMseqs2 has just been published at [Nature Biotechnology](https://www.nature.com/nbt/journal/vaop/ncurrent/full/nbt.3988.html).
-
 ## Installation
 MMseqs2 can be used by compiling from source, downloading a statically compiled version, using [Homebrew](https://github.com/Homebrew/brew), [conda](https://github.com/conda/conda) or [Docker](https://github.com/moby/moby). MMseqs2 requires a 64-bit system (check with `uname -a | grep x86_64`) with at least the SSE4.1 instruction set (check by executing `cat /proc/cpuinfo | grep sse4_1` on Linux or `sysctl -a | grep machdep.cpu.features | grep SSE4.1` on MacOS).
     
     # install by brew
     brew install mmseqs2
     # install via conda
-     conda install -c bioconda mmseqs2 
+     conda install -c bioconda mmseqs2
     # install docker
     docker pull soedinglab/mmseqs2
-     # static build sse4.1
-     wget https://mmseqs.com/latest/mmseqs-static_sse41.tar.gz; tar xvfz mmseqs-static_sse41.tar.gz; export PATH=$(pwd)/mmseqs2/bin/:$PATH
-     # static build AVX2
-     wget https://mmseqs.com/latest/mmseqs-static_avx2.tar.gz; tar xvfz mmseqs-static_avx2.tar.gz; export PATH=$(pwd)/mmseqs2/bin/:$PATH
+     # static build with SSE4.1
+     wget https://mmseqs.com/latest/mmseqs-linux-sse41.tar.gz; tar xvfz mmseqs-linux-sse41.tar.gz; export PATH=$(pwd)/mmseqs/bin/:$PATH
+     # static build with AVX2
+     wget https://mmseqs.com/latest/mmseqs-linux-avx2.tar.gz; tar xvfz mmseqs-linux-avx2.tar.gz; export PATH=$(pwd)/mmseqs/bin/:$PATH

 The AVX2 version is faster than SSE4.1, check if AVX2 is supported by executing `cat /proc/cpuinfo | grep avx2` on Linux and `sysctl -a | grep machdep.cpu.leaf7_features | grep AVX2` on MacOS).
 We also provide static binaries for MacOS and Windows at [mmseqs.com/latest](https://mmseqs.com/latest).
@@ -55,73 +49,72 @@ MMseqs2 comes with a bash command and parameter auto completion, which can be ac
        fi
 </pre>
         
-### Compile from source
-Compiling MMseqs2 from source has the advantage that it will be optimized to the specific system, which should improve its performance. To compile MMseqs2 `git`, `g++` (4.6 or higher) and `cmake` (3.0 or higher) are needed. Afterwards, the MMseqs2 binary will be located in the `build/bin/` directory.
+### Compilation from source
+Compiling MMseqs2 from source has the advantage that it will be optimized to the specific system, which should improve its performance. To compile MMseqs2 `git`, `g++` (4.8 or later) and `cmake` (2.8.12 or later) are needed. Afterwards, the MMseqs2 binary will be located in the `build/bin/` directory.

        git clone https://github.com/soedinglab/MMseqs2.git
        cd MMseqs2
        mkdir build
        cd build
        cmake -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_PREFIX=. ..
-        make
-        make install 
+        make -j 4
+        make install
        export PATH=$(pwd)/bin/:$PATH

-:exclamation: To compile MMseqs2 on MacOS, first install the `gcc` compiler from Homebrew. The default MacOS `clang` compiler does not support OpenMP and MMseqs2 will only be able to use a single thread. Then use the following cmake call:
+:exclamation: To compile MMseqs2 on MacOS, first install the `gcc` compiler from Homebrew. The default MacOS `clang` compiler does not support OpenMP and MMseqs2 will only be able to use a single thread. Then use the following `cmake` call:

-        CXX="$(brew --prefix)/bin/g++-8" cmake -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_PREFIX=. ..
+        CC="$(brew --prefix)/bin/gcc-9" CXX="$(brew --prefix)/bin/g++-9" cmake -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_PREFIX=. ..
                
-        
-## Easy workflows 
-We provide `easy` workflows to search and cluster. The `easy-search` searches directly with a FASTA/FASTQ file against a either another FASTA/FASTQ file or an already existing MMseqs2 target database.
-        
-        mmseqs createdb examples/DB.fasta targetDB
-        mmseqs easy-search examples/QUERY.fasta targetDB alnRes tmp 
-        
+
+## Getting started
+We provide `easy` workflows to cluster, search and assign taxonomy. These `easy` workflows are a shorthand to deal directly with FASTA/FASTQ files as input and output. MMseqs2 provides many modules to transform, filter, execute external programs and search. However, these modules use the MMseqs2 database formats, instead of the FASTA/FASTQ format. For maximum flexibility, we recommend using MMseqs2 workflows and modules directly. Please read more about this in the [documentation](https://github.com/soedinglab/mmseqs2/wiki).
+
+### Cluster
+
 For clustering, MMseqs2 `easy-cluster` and `easy-linclust` are available.

 `easy-cluster` by default clusters the entries of a FASTA/FASTQ file using a cascaded clustering algorithm.
        
-        mmseqs easy-cluster examples/DB.fasta clusterRes tmp         
+        mmseqs easy-cluster examples/DB.fasta clusterRes tmp --min-seq-id 0.5 -c 0.8 --cov-mode 1        
        
 `easy-linclust` clusters the entries of a FASTA/FASTQ file. The runtime scales linearly with input size. This mode is recommended for huge datasets.
                
        mmseqs easy-linclust examples/DB.fasta clusterRes tmp     
-        
-These `easy` workflows are a shorthand to deal directly with FASTA/FASTQ files as input and output. MMseqs2 provides many modules to transform, filter, execute external programs and search. However, these modules use the MMseqs2 database formats, instead of the FASTA/FASTQ format. For optimal efficiency, we recommend to use MMseqs2 workflows and modules directly.
-       
-## How to search
-You can use the query database "QUERY.fasta" and target database "DB.fasta" in the examples folder to test the search workflow. First, you need to convert the FASTA files into the MMseqs2 database format.
+                
+Sequence identity is in default [estimated](https://github.com/soedinglab/MMseqs2/wiki#how-does-mmseqs2-compute-the-sequence-identity) to output real sequence identity use `--alignment-mode 3`.
+Read more about the [clustering format](https://github.com/soedinglab/mmseqs2/wiki#clustering-format) in our user guide.
+                
+Please adjust the [clustering criteria](https://github.com/soedinglab/MMseqs2/wiki#clustering-criteria) and check if temporary directory provides enough free space. For disk space requirements, see the user guide.

-        mmseqs createdb examples/QUERY.fasta queryDB
-        mmseqs createdb examples/DB.fasta targetDB
+### Search
+         
+The `easy-search` searches directly with a FASTA/FASTQ files against either another FASTA/FASTQ file or an already existing MMseqs2 database.
        
-If the target database will be used several times, we recommend to precompute an index of `targetDB` as this saves overhead computations. The index should be created on a computer that has the at least the same amount of memory as the computer that performs the search. 
+        mmseqs easy-search examples/QUERY.fasta DB.fasta alnRes tmp
+ 
+It is also possible to pre-compute the index for the target database:

+        mmseqs createdb examples/DB.fasta targetDB
        mmseqs createindex targetDB tmp
+        mmseqs easy-search examples/QUERY.fasta targetDB alnRes tmp
+        
+The speed and sensitivity of the `search` can be adjusted with `-s` parameter and should be adapted based on your use case (see [setting sensitivity -s parameter](https://github.com/soedinglab/mmseqs2/wiki#set-sensitivity--s-parameter)). A very fast search would use a sensitivity of `-s 1.0`, while a very sensitive search would use a sensitivity of up to `-s 7.0`. A detailed guide how to speed up searches is [here](https://github.com/soedinglab/MMseqs2/wiki#how-to-control-the-speed-of-the-search).

-MMseqs2 stores intermediate results in `tmp`. Using a fast local drive can reduce load on a shared filesystem and increase speed.
-
-To run the search execute:
-
-        mmseqs search queryDB targetDB resultDB tmp
-
-The sensitivity of the `search` can be adjusted with `-s` parameter and should be adapted based on your use case (see [setting sensitivity -s parameter](https://github.com/soedinglab/mmseqs2/wiki#set-sensitivity--s-parameter)). 
-
-If you require the exact alignment information (Sequence identity, alignment string, ...) in later steps add the option `-a`, without this parameter MMseqs2 will automatically decide if the exact alignment criteria to optimize computational time.
-
-Please ensure that, in case of large input databases, the `tmp` directory provides enough free space.
-Our user guide provides or information about [disk space requirements](https://github.com/soedinglab/mmseqs2/wiki#prefiltering-module).
+The output can be customized with the `--format-output` option e.g. `--format-output "query,target,qaln,taln"` returns the query and target accession and the pairwise alignments in tab separated format. You can choose many different [output columns](https://github.com/soedinglab/mmseqs2/wiki#custom-alignment-format-with-convertalis).

-Then convert the result database into a BLAST-tab formatted database (format: qId, tId, seqIdentity, alnLen, mismatchCnt, gapOpenCnt, qStart, qEnd, tStart, tEnd, eVal, bitScore).
+### Taxonomy
+The `easy-taxonomy` workflow can be used assign sequences taxonomical labels. It performs a search against a target sequence databases and computes the lowest common ancestor of all equal scoring top hits (default). Other assignment options are available through `--lca-mode`.

-        mmseqs convertalis queryDB targetDB resultDB resultDB.m8
+        mmseqs createdb examples/DB.fasta targetDB
+        mmseqs createtaxdb targetDB tmp
+        mmseqs createindex targetDB tmp
+        mmseqs easy-taxonomy examples/QUERY.fasta targetDB alnRes tmp

-The output can be customized wit the `--format-output` option e.g. `--format-output "query,target,qaln,taln"` returns the query and target accession and the pairwise alignments in tab separated format. You can choose many different [output columns](https://github.com/soedinglab/mmseqs2/wiki#custom-alignment-format-with-convertalis) in the `convertalis` module. Make sure that you used the option `-a` during the search (`mmseqs search ... -a`).
+In default `createtaxdb` assigns every sequence with a Uniprot accession to a taxonomical identifier and downloads the NCBI taxonomy. We also support [BLAST](https://github.com/soedinglab/MMseqs2/wiki#create-a-sequence-database-with-taxonomic-information-from-an-existing-blast-database), [SILVA](https://github.com/soedinglab/MMseqs2/wiki#create-a-sequence-database-with-taxonomic-information-for-silva) or [custom taxonomical](https://github.com/soedinglab/MMseqs2/wiki#manually-annotate-a-sequence-database-with-taxonomic-information) databases.

-        mmseqs convertalis queryDB targetDB resultDB resultDB.pair --format-output "query,target,qaln,taln"
+Read more about the [taxonomy format](https://github.com/soedinglab/MMseqs2/wiki#taxonomy-format) and the [classification](https://github.com/soedinglab/MMseqs2/wiki#taxonomy-assignment-using-mmseqs-taxonomy) in our user guide.

-### Other search modes
+### Supported search modes

 MMseqs2 provides many additional search modes:
 * Iterative sequences-profile searches (like PSI-BLAST) with the `--num-iterations` parameter
@@ -129,58 +122,25 @@ MMseqs2 provides many additional search modes:
 * [Iterative increasing sensitivity searches](https://github.com/soedinglab/MMseqs2/wiki#how-to-find-the-best-hit-the-fastest-way) to find only the best hits faster
 * [Taxonomic assignment](https://github.com/soedinglab/MMseqs2/wiki#taxonomy-assignment-using-mmseqs-taxonomy) using 2bLCA or LCA
 * Fast ungapped alignment searches to find [very similar sequence matches](https://github.com/soedinglab/MMseqs2/wiki#mapping-very-similar-sequences-using-mmseqs-map)
- * Very fast and sensitive Searches against [profile databases such as the PFAM](https://github.com/soedinglab/MMseqs2/wiki#how-to-create-a-target-profile-database-from-pfam)
+ * Very fast and sensitive searches against [profile databases such as the PFAM](https://github.com/soedinglab/MMseqs2/wiki#how-to-create-a-target-profile-database-from-pfam)
 * [Reciprocal best hits search](https://github.com/soedinglab/MMseqs2/wiki#reciprocal-best-hit-using-mmseqs-rbh)
-
+ * [Web search API and user interface](https://github.com/soedinglab/MMseqs2-App)

 Many modes can also be combined. You can, for example, do a translated nucleotide against protein profile search.

-## How to cluster 
-Before clustering, convert your database into the MMseqs2 database format:
-
-        mmseqs createdb examples/DB.fasta DB
-
-Then execute the clustering:
-
-        mmseqs cluster DB clu tmp
-        
-or linear time clutering (faster but less sensitive):
-
-        mmseqs linclust DB clu tmp
-
-Please adjust the [clustering criteria](https://github.com/soedinglab/MMseqs2/wiki#clustering-criteria) and check if temporary direcotry provides enough free space. For disk space requirements, see the user guide.
-
-To generate a FASTA-style formatted output file from the ffindex output file, type:
-
-        mmseqs createseqfiledb DB clu clu_seq 
-        mmseqs result2flat DB DB clu_seq clu_seq.fasta
-        
-To generate a TSV-style formatted output file from the ffindex output file, type:
-
-        mmseqs createtsv DB DB clu clu.tsv
-        
-To extract the representative sequences from the clustering result call:    
-    
-        mmseqs result2repseq DB clu DB_clu_rep
-        mmseqs result2flat DB DB DB_clu_rep DB_clu_rep.fasta --use-fasta-header
-
-Read more about the format [here](https://github.com/soedinglab/mmseqs2/wiki#clustering-format).
-
 ### Memory Requirements
-MMseqs2 checks the available memory of the computer and automatically divide the target database in parts that fit into memory. Splitting the database will increase the runtime slightly.
-
-The memory consumption grows linearly with the number of residues in the database. The following formula can be used to estimate the index size.  
-        
-        M = (7 × N × L) byte + (8 × a^k) byte
-
-Where `L` is the average sequence length and `N` is the database size.
+MMseqs2 minimum memory requirements for `cluster` or `linclust` is 1 byte per sequence residue, `search` needs 1 byte per target residue. Sequence databases can be compressed using the `--compress` flag, DNA sequences can be reduced by a factor of `~3.5` and proteins by `~1.7`.
+   
+MMseqs2 checks the available system memory and automatically divides the target database in parts that fit into memory. Splitting the database will increase the runtime slightly. It is possible to control the memory usage using `--split-memory-limit`.

 ### How to run MMseqs2 on multiple servers using MPI
 MMseqs2 can run on multiple cores and servers using OpenMP and Message Passing Interface (MPI).
 MPI assigns database splits to each compute node, which are then computed with multiple cores (OpenMP).

-Make sure that MMseqs2 was compiled with MPI by using the `-DHAVE_MPI=1` flag (`cmake -DHAVE_MPI=1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=. ..`). Our precompiled static version of MMseqs2 can not use MPI. The version string of MMseqs2 will have a `-MPI` suffix, if it was build successfully with MPI support.
+Make sure that MMseqs2 was compiled with MPI by using the `-DHAVE_MPI=1` flag (`cmake -DHAVE_MPI=1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=. ..`). Our precompiled static version of MMseqs2 cannot use MPI. The version string of MMseqs2 will have a `-MPI` suffix, if it was built successfully with MPI support.
+
+To search with multiple servers, call the `search` or `cluster` workflow with the MPI command exported in the RUNNER environment variable. The databases and temporary folder have to be shared between all nodes (e.g. through NFS):
+
+        RUNNER="mpirun -pernode -np 42" mmseqs search queryDB targetDB resultDB tmp

-To search with multiple servers call the `search` or `cluster` workflow with the MPI command exported in the RUNNER environment variable. The databases and temporary folder have to be shared between all nodes (e.g. through NFS):

-        RUNNER="mpirun -np 42" mmseqs search queryDB targetDB resultDB tmp
--- a/appveyor.yml
+++ b/appveyor.yml
-build: off
-
-init:
-  - git config --global core.autocrlf input
-
-environment:
-  CYG_MIRROR: http://cygwin.mirror.constant.com
-  CYG_PACKAGES: bash,xxd,cmake,make,gcc-core,gcc-g++,zlib-devel,libbz2-devel,busybox-standalone,git,binutils
-  CYG_ROOT: C:\cygwin64
-  CYG_CACHE: C:\cygwin64\var\cache\setup
-  CYG_SETUP: C:\cygwin64\setup-x86_64.exe
-  BASH: C:\cygwin64\bin\bash
-  PKEY:
-    secure: qcbqWes9F8y+mankxX5zRTMBN6Q6WD5x7CZKP2bIe+D1LrK/kDE9OSAs8515pR1+hI365q/3o0nANRtckJOfz77/SHEM1pr/Tvkdz0Z0J6VI+RaDeMDicNBTS9dFP60jXnZQM6ihcARxeS7KKGuO3SNf26GwzwkFcu3gdr9eAoBqHXMx0mj9Pa/Zee04/FuAeaeGUgjsjf/PtPtqbj4uf/v0mpYnZsq6h9UsACruue+i0mdkYKvwrJpEPFLnXM5JYmMx7rLobFXRsUW+zKwhLPURwHlPvzosod4Bs0Tnoi1ia/8Up+EmutriEmt3mRXbvca2sjzRTs+3Grdtw38kRalCY6A047QIp3scZgyhOOErGjQooI29pStOLquZnepYTO2EmUD0UXdNLveTuUed7uB6fl+IHfyJC4DF1FD//gOArYyz0BjZdvjfcNCqjXLj49nDrtr4DMiOsMbtNuvpYEATyF7hXPt2M62s+3ccj8/DwTscCnfjQGo/CeRvM9QqOBUzDgDypjr3yrXBcS82LO167gojdI1epY3dndxG0js6Uc2viPbbbLpnl0/4ZKnPoX9ZSWksLAWnY8v5DDWt1rDHirv+GLbTXs9fKlmduY0b1a49ntyA+L28q2qWUZz9WIMHJtoh8r3BDFg+Qqyk3m16MywlUpXOTTB4GyMlQys0R8DH8FYZL6cUttSRSiS94X8XFYDH/6yJC0P+TK9GgWH78Ca+I/6HkUo5oNb38i9Hw7zXu6cvvfTbNi1eTSSvXpvwjQBM9PU3NFrH3gqgAdGUi7kVoABc6mAWGl9vfpDbAHHBVBenmCQU84GcltgawE4W53Ayi86ImgRC0IPmmC2Zli+4zcIQGNqf1C4Y2/R8qWvVK84ehJe/HlY02h3UYBAk6R3Pr9bEm1V+Riivo8S9bawRp3i8Ljcr3AF9UIP2xY235MWvrQB7aCwxNiSrxVzesM7OlKb+NnEMZJCbdIuY7WIgKeWeXg9f1sn8yZ89TY+Q0oNWfmPFVcsQuagXpfQEP8EGJm5WiMpd0gukFkFDsJvuI9kbLll+Kwoz9YHH2eLaYhMp/ZXVNsvRG8Jb0D2TlggnInBsBUVpqsRJfEwGXHithCleXYj4i9TmSDPweGgrAqhdJYLbZWnL94ndxhWAE2sN5IYEJBavAOG5FDf9z/zmBXhHpJKnnqLT0ahnpf7P7+fKduEhh1iNXV/oweuE3RDvddLDmP0kfSSg2uIfiarEuQx8lghDKkgl37Bow+KMYFxWB188G3Z8JjQpxLOV0fZ8eENx1zpF1HplhvxtE9dhbJ8wGzpCS1Mp9RFzkTUMdZue/uJgNGjQ8/nPq1zBztxQPX0q4+AsOjBBTH0c2Cnaj7DTL7WteOTSBimbdvh3yNKj98e3Idnum0mwTIzUqMYIBubNl62kL9vG6NR2xMcuqRXnRHcvCKo38CoM3iuL3e0KoTwozTnzVdKFmiRn4W8GKzFHDpKZLMU+/zzsuy93+27jB0T585EAfZdO5oxu4Ki5LuDR9R5t40ZOSBMuA/ccVdMs0pmidhuQrbtQoPC2JNboZEYrHaYXEk132bQZevn08quq3gxoV95mkg4SggDdET/4Sj3vhOaHWKg+ZE/49OzC12ZF0kwRkwB7yayOSVfYxF2tTvhFqGXfo9Zbc0CPqZw4m3qgogcgtTwtwIAIhjXU2ur22+l/S74MjaiK9mBzGksNFCGmgzJgEx31iZqlNFQCbPUlfUBW57RVqsSwOCaeTxAHJ2Jozb9lmgKzPcFCz/NvFCIfaRmGJRFF7kbRek2Fpiwtt80ZXIrumRNOo+/fQMZ7THSvK64DJLWafqYjVlt991VKh9gw1uK68MSuPoLpwTuiTg1QAPPV9xqA246W5+OlgWuTVZGGmVyuUYkF1r2CLkdNY8NjmJ/yJm35XsX8rdU0pjK6LTfHSuAE1XcJtB+0vR2IHsUNoa7Dxs+E2SMMXhJfbLD9MOOOzY86PVc9/iboJuBI02485LBLamH4J9EOmEFxSLMYHVwMvyrurDA27x3ni1HHV7KZl/hpFjm6bxtEMvY4IglOoa6T28qED5DM4ACI+AtivGMPJ8BTGZaKQzgNVghvCIkH/fKX2qTCpApRK6+qI48gvKutSmB5mDFeOv8BQgU=
-
-cache:
-  - C:\cygwin64\var\cache\setup
-
-install:
-  - if defined CYG_ROOT (%CYG_SETUP% --quiet-mode --no-shortcuts --only-site --root "%CYG_ROOT%" --site "%CYG_MIRROR%" --local-package-dir "%CYG_CACHE%" --packages "%CYG_PACKAGES%" --upgrade-also)
-
-build_script:
-  - if defined BASH (%BASH% -lc "${APPVEYOR_BUILD_FOLDER}/util/build_windows.sh $(cygpath ${APPVEYOR_BUILD_FOLDER}) $(cygpath ${APPVEYOR_BUILD_FOLDER})/build")
-  - cd %APPVEYOR_BUILD_FOLDER%\build && 7z a %APPVEYOR_BUILD_FOLDER%\mmseqs-win64.zip mmseqs
-
-on_success:
-  - ps: >-
-      if (Test-Path "C:\Users\appveyor\.ssh\id_rs") {
-          Remove-Item "C:\Users\appveyor\.ssh\id_rsa"
-      }
-      if(($env:appveyor_repo_branch -eq 'master') -and (-not (Test-Path env:APPVEYOR_PULL_REQUEST_NUMBER))) {
-          $fileContent = "-----BEGIN RSA PRIVATE KEY-----`n"
-          $fileContent += $env:PKEY.Replace(' ', "`n")
-          $fileContent += "`n-----END RSA PRIVATE KEY-----`n"
-          Set-Content "C:\Users\appveyor\.ssh\id_rsa" $fileContent
-      }
-  - if exist C:\Users\appveyor\.ssh\id_rsa ( ssh -o StrictHostKeyChecking=no codeship@uniclust.mmseqs.com "mkdir -p /home/mirdita/repositories/mmseqs-webserver/archive/%APPVEYOR_REPO_COMMIT%" )
-  - if exist C:\Users\appveyor\.ssh\id_rsa ( cd %APPVEYOR_BUILD_FOLDER% && scp -o StrictHostKeyChecking=no mmseqs-win64.zip codeship@uniclust.mmseqs.com:/home/mirdita/repositories/mmseqs-webserver/archive/%APPVEYOR_REPO_COMMIT% )
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
-# Starter pipeline
-# Start with a minimal pipeline that you can customize to build and deploy your code.
-# Add steps that build, run tests, deploy, and more:
 # https://aka.ms/yaml

 trigger:
- master
+  - master

-pool:
-  vmImage: 'Ubuntu-16.04'
-  
-strategy:
-  matrix:
-    avx2:
-      SIMD: 'AVX2'
-      FILENAME: 'mmseqs-static_avx2.tar.gz'
-      STATIC: 1
-      MPI: 0
-    sse:
-      SIMD: 'SSE4.1'
-      FILENAME: 'mmseqs-static_sse41.tar.gz'
-      STATIC: 1
-      MPI: 0
-    avx2_mpi:
-      SIMD: 'AVX2' 
-      STATIC: 0
-      FILENAME: '' 
-      MPI: 1
+variables:
+    regression: 1

-steps:
- script: |
-    sudo apt-get update
-    sudo apt-get -y install pandoc mpi-default-dev mpi-default-bin texlive-latex-recommended texlive-fonts-extra
-  displayName: 'Install dependencies'
+jobs:
+  - job: build_ubuntu_1604_userguide
+    displayName: Ubuntu 1604 Userguide
+    pool:
+      vmImage: 'Ubuntu-16.04'
+    steps:
+      - checkout: "none"
+      - script: |
+          sudo apt-get update
+          sudo apt-get -y install pandoc texlive-latex-recommended texlive-fonts-extra
+        displayName: Install Dependencies
+      - script: |
+          cd ${SYSTEM_DEFAULTWORKINGDIRECTORY}
+          git clone https://github.com/soedinglab/MMseqs2.wiki.git .
+          .pandoc/make-pdf.sh
+        displayName: Build Userguide
+      - task: PublishPipelineArtifact@0
+        inputs:
+          targetPath: $(System.DefaultWorkingDirectory)/userguide.pdf
+          artifactName: userguide

- script: |
-    mkdir build
-    cd build
-    if [ "${STATIC}" -eq "1" ]; then
-        cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=OFF \
-          -DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc \
-          -static-libstdc++" -DCMAKE_FIND_LIBRARY_SUFFIXES=".a" \
-          -DENABLE_WERROR=1 -DHAVE_${SIMD}=1 -DHAVE_MPI=${MPI} ..
-    else
-        cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-          -DENABLE_WERROR=1 -DHAVE_${SIMD}=1 -DHAVE_MPI=${MPI} ..
-    fi
+  - job: build_macos_1014
+    displayName: macOS 1014
+    pool:
+      vmImage: 'macOS-10.14'
+    steps:
+      - checkout: self
+        submodules: true
+      - script: |
+          brew install cmake gcc@9 zlib bzip2 coreutils
+        displayName: Install Dependencies
+      - script: |
+          cd ${BUILD_SOURCESDIRECTORY}
+          CC=gcc-9 CXX=g++-9 ./util/build_osx.sh . build
+        displayName: Build MMseqs2
+      - script: |
+          ${BUILD_SOURCESDIRECTORY}/util/regression/run_regression.sh ${BUILD_SOURCESDIRECTORY}/build/build_sse41/src/mmseqs ${BUILD_SOURCESDIRECTORY}/regression
+        displayName: Run Regression Suite
+        condition: eq(variables['regression'], 1)
+      - task: PublishPipelineArtifact@0
+        inputs:
+          targetPath: $(Build.SourcesDirectory)/build/build_sse41/src/mmseqs
+          artifactName: mmseqs-darwin-sse41
+      - task: PublishPipelineArtifact@0
+        inputs:
+          targetPath: $(Build.SourcesDirectory)/build/build_avx2/src/mmseqs
+          artifactName: mmseqs-darwin-avx2

-    make -j $(nproc --all)
-  displayName: 'Run build'
+  - job: build_ubuntu_1604
+    displayName: Ubuntu 1604 MMseqs2
+    pool:
+      vmImage: 'Ubuntu-16.04'
+    timeoutInMinutes: 120
+    strategy:
+      matrix:
+        avx2:
+          SIMD: 'AVX2'
+          STATIC: 1
+          MPI: 0
+          BUILD_TYPE: RelWithDebInfo
+        sse:
+          SIMD: 'SSE4_1'
+          STATIC: 1
+          MPI: 0
+          BUILD_TYPE: RelWithDebInfo
+        avx2_mpi:
+          SIMD: 'AVX2'
+          STATIC: 0
+          MPI: 1
+          BUILD_TYPE: RelWithDebInfo
+        asan:
+          SIMD: 'AVX2'
+          STATIC: 0
+          MPI: 0
+          BUILD_TYPE: ASan

- script: |
-    mkdir ~/regression && cd ~/regression
-    git clone https://bitbucket.org/martin_steinegger/mmseqs-benchmark.git
-    export TTY=0
-    export MMSEQS_NUM_THREADS=8
-    export PATH="$(pwd)/mmseqs-benchmark/:$PATH"
-    ./mmseqs-benchmark/run_regression.sh "${BUILD_SOURCESDIRECTORY}/build/src/mmseqs" ~/regression/results/
-  displayName: 'Run regression test'
+    steps:
+      - checkout: self
+        submodules: true
+      - script: |
+          sudo apt-get update
+          sudo apt-get -y install mpi-default-dev mpi-default-bin
+        displayName: Install Dependencies
+        condition: eq(variables['MPI'], 1)
+      - script: |
+          mkdir build && cd build
+          if [ "${STATIC}" -eq "1" ]; then
+              cmake -DHAVE_SANITIZER=1 -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DHAVE_TESTS=1 \
+                -DBUILD_SHARED_LIBS=OFF \
+                -DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc \
+                -static-libstdc++" -DCMAKE_FIND_LIBRARY_SUFFIXES=".a" \
+                -DENABLE_WERROR=1 -DHAVE_${SIMD}=1 -DHAVE_MPI=${MPI} ..
+          else
+              cmake -DHAVE_SANITIZER=1 -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DHAVE_TESTS=1 \
+                -DENABLE_WERROR=1 -DHAVE_${SIMD}=1 -DHAVE_MPI=${MPI} ..
+          fi

- task: DownloadSecureFile@1
-  inputs:
-    secureFile: secretKeyPleaseDontSteal
-  displayName: 'Get the deploy key'  
+          make -j $(nproc --all)
+        displayName: Build MMseqs2
+      - script: |
+          export TTY=0
+          if [ "${BUILD_TYPE}" = "ASan" ]; then
+            echo "leak:libgomp1" > ${BUILD_SOURCESDIRECTORY}/ASan.supp
+            export export ASAN_OPTIONS=suppressions=${BUILD_SOURCESDIRECTORY}/ASan.supp
+          fi
+          ${BUILD_SOURCESDIRECTORY}/util/regression/run_regression.sh ${BUILD_SOURCESDIRECTORY}/build/src/mmseqs ${BUILD_SOURCESDIRECTORY}/regression
+        displayName: Run Regression Suite
+        condition: eq(variables['regression'], 1)
+      - task: PublishPipelineArtifact@0
+        condition: eq(variables['STATIC'], 1)
+        inputs:
+          targetPath: $(Build.SourcesDirectory)/build/src/mmseqs
+          artifactName: mmseqs-linux-$(SIMD)

- script: |
-    [ ! -z "${FILENAME}" ] || exit 0
-    mkdir ~/.ssh && mv $DOWNLOADSECUREFILE_SECUREFILEPATH ~/.ssh/id_rsa
-    chmod 700 ~/.ssh && chmod 600 ~/.ssh/id_rsa
-    ssh-keyscan -t rsa uniclust.mmseqs.com >> ~/.ssh/known_hosts
-    ssh codeship@uniclust.mmseqs.com "mkdir -p \"/home/mirdita/repositories/mmseqs-webserver/archive/${BUILD_SOURCEVERSION}\""
-    mkdir -p ~/mmseqs2.wiki
-    cd ~/mmseqs2.wiki
-    git clone https://github.com/soedinglab/MMseqs2.wiki.git .
-    .pandoc/make-pdf.sh
-    scp userguide.pdf codeship@uniclust.mmseqs.com:"/home/mirdita/repositories/mmseqs-webserver/archive/${BUILD_SOURCEVERSION}"
-    cd ${BUILD_SOURCESDIRECTORY}/build
-    CURR_BUILD="mmseqs2"
-    mkdir -p ${CURR_BUILD}/bin
-    mkdir -p ${CURR_BUILD}/util
-    mkdir -p ${CURR_BUILD}
-    cp src/mmseqs ${CURR_BUILD}/bin
-    chmod +x ${CURR_BUILD}/bin/mmseqs
-    cp ../util/bash-completion.sh ${CURR_BUILD}/util
-    chmod +x ${CURR_BUILD}/util/bash-completion.sh
-    cp -r ../LICENCE.md ../README.md ~/mmseqs2.wiki/userguide.pdf ../examples ${CURR_BUILD}
-    chmod -R g-w,o-w ${CURR_BUILD}
-    tar czvf ${FILENAME} ${CURR_BUILD}
-    scp ${FILENAME} codeship@uniclust.mmseqs.com:"/home/mirdita/repositories/mmseqs-webserver/archive/${BUILD_SOURCEVERSION}"
-    ssh codeship@uniclust.mmseqs.com "update-latest-mmseqs.sh \"${BUILD_SOURCEVERSION}\""
-  displayName: 'Upload build'  
+  - job: build_windows_2019
+    displayName: Windows 2019
+    pool:
+      vmImage: 'windows-2019'
+    variables:
+      CYGWIN_ROOT: $(System.Workfolder)\cygwin
+      CYGWIN_MIRROR: http://cygwin.mirror.constant.com
+    timeoutInMinutes: 120
+    steps:
+      - powershell: git config --global core.autocrlf false
+        displayName: core.autocrlf false
+      - checkout: self
+        submodules: true
+      - script: |
+          rmdir /s /q C:\Strawberry
+        displayName: Remove Strawberry Perl (Conflict with Cygwin)
+      - script: |
+          choco install cygwin --params="/InstallDir:%CYGWIN_ROOT%"
+        displayName: Install Cygwin
+      - script: |
+          %CYGWIN_ROOT%\cygwinsetup.exe -qnNdO -R "%CYGWIN_ROOT%" -s "%CYGWIN_MIRROR%" -g -P ^
+          bash,^
+          xxd,^
+          cmake,^
+          make,^
+          gcc-core,^
+          gcc-g++,^
+          zlib-devel,^
+          libbz2-devel,^
+          busybox-standalone,^
+          git,^
+          binutils,^
+          wget
+        displayName: Install Dependencies
+      - script: |
+          %CYGWIN_ROOT%\bin\bash.exe -cl "${BUILD_SOURCESDIRECTORY}/util/build_windows.sh $(cygpath ${BUILD_SOURCESDIRECTORY}) $(cygpath ${BUILD_SOURCESDIRECTORY}/build)"
+        displayName: Build MMseqs2
+      - task: "ArchiveFiles@2"
+        inputs:
+          rootFolderOrFile: $(Build.SourcesDirectory)\build\mmseqs
+          archiveFile: $(Build.SourcesDirectory)\mmseqs-win64.zip
+          includeRootFolder: true
+          archiveType: zip
+      - task: PublishPipelineArtifact@0
+        inputs:
+          targetPath: $(Build.SourcesDirectory)\mmseqs-win64.zip
+          artifactName: mmseqs-win64
+      - script: |
+          %BUILD_SOURCESDIRECTORY%\build\mmseqs\mmseqs.bat version
+        displayName: Setup Busybox
+      - script: |
+          %CYGWIN_ROOT%\bin\bash.exe -cl "${BUILD_SOURCESDIRECTORY}/util/regression/run_regression.sh $(cygpath ${BUILD_SOURCESDIRECTORY}/build/mmseqs/bin/mmseqs.exe) $(cygpath ${BUILD_SOURCESDIRECTORY}/regression)"
+        displayName: Run Regression Suite
+        condition: eq(variables['regression'], 1)
+
+  - job: upload_artifacts
+    displayName: Upload Artifacts
+    condition: and(succeeded(), ne(variables['Build.Reason'], 'PullRequest'))
+    pool:
+      vmImage: 'Ubuntu-16.04'
+    dependsOn:
+      - build_ubuntu_1604_userguide
+      - build_macos_1014
+      - build_ubuntu_1604
+      - build_windows_2019
+    steps:
+      - script: |
+          cd "${BUILD_SOURCESDIRECTORY}"
+          mkdir mmseqs
+          cp -f README.md LICENCE.md mmseqs
+          cp -r examples mmseqs
+          mkdir mmseqs/matrices
+          cp -f data/*.out mmseqs/matrices
+          mkdir mmseqs/util
+          cp -f util/bash-completion.sh mmseqs/util
+          mkdir mmseqs/bin
+      - task: DownloadPipelineArtifact@1
+        inputs:
+          artifactName: userguide
+          targetPath: $(Build.SourcesDirectory)/mmseqs
+      - task: DownloadPipelineArtifact@1
+        inputs:
+          artifactName: mmseqs-darwin-sse41
+          targetPath: $(Build.SourcesDirectory)/mmseqs/bin
+      - script:
+          chmod +x "${BUILD_SOURCESDIRECTORY}/mmseqs/bin/mmseqs"
+      - task: ArchiveFiles@2
+        inputs:
+          rootFolderOrFile: $(Build.SourcesDirectory)/mmseqs
+          archiveFile: $(Build.SourcesDirectory)/mmseqs-osx-sse41.tar.gz
+          includeRootFolder: true
+          archiveType: tar
+      - task: DownloadPipelineArtifact@1
+        inputs:
+          artifactName: mmseqs-darwin-avx2
+          targetPath: $(Build.SourcesDirectory)/mmseqs/bin
+      - script:
+          chmod +x "${BUILD_SOURCESDIRECTORY}/mmseqs/bin/mmseqs"
+      - task: ArchiveFiles@2
+        inputs:
+          rootFolderOrFile: $(Build.SourcesDirectory)/mmseqs
+          archiveFile: $(Build.SourcesDirectory)/mmseqs-osx-avx2.tar.gz
+          includeRootFolder: true
+          archiveType: tar
+      - task: DownloadPipelineArtifact@1
+        inputs:
+          artifactName: mmseqs-linux-SSE4_1
+          targetPath: $(Build.SourcesDirectory)/mmseqs/bin
+      - script:
+          chmod +x "${BUILD_SOURCESDIRECTORY}/mmseqs/bin/mmseqs"
+      - task: ArchiveFiles@2
+        inputs:
+          rootFolderOrFile: $(Build.SourcesDirectory)/mmseqs
+          archiveFile: $(Build.SourcesDirectory)/mmseqs-linux-sse41.tar.gz
+          includeRootFolder: true
+          archiveType: tar
+      - task: DownloadPipelineArtifact@1
+        inputs:
+          artifactName: mmseqs-linux-AVX2
+          targetPath: $(Build.SourcesDirectory)/mmseqs/bin
+      - script:
+          chmod +x "${BUILD_SOURCESDIRECTORY}/mmseqs/bin/mmseqs"
+      - task: ArchiveFiles@2
+        inputs:
+          rootFolderOrFile: $(Build.SourcesDirectory)/mmseqs
+          archiveFile: $(Build.SourcesDirectory)/mmseqs-linux-avx2.tar.gz
+          includeRootFolder: true
+          archiveType: tar
+      - script:
+          rm "${BUILD_SOURCESDIRECTORY}/mmseqs/bin/mmseqs"
+      - task: DownloadPipelineArtifact@1
+        inputs:
+          artifactName: mmseqs-win64
+          targetPath: $(Build.SourcesDirectory)
+      - script: |
+          unzip "${BUILD_SOURCESDIRECTORY}/mmseqs-win64.zip"
+          chmod +x mmseqs/mmseqs.bat mmseqs/bin/*
+      - task: ArchiveFiles@2
+        inputs:
+          rootFolderOrFile: $(Build.SourcesDirectory)/mmseqs
+          archiveFile: $(Build.SourcesDirectory)/mmseqs-win64.zip
+          includeRootFolder: true
+          archiveType: zip
+      - task: DownloadSecureFile@1
+        inputs:
+          secureFile: secretKeyPleaseDontSteal
+        displayName: Get Deployment Key
+      - script: |
+          mkdir ~/.ssh && mv $DOWNLOADSECUREFILE_SECUREFILEPATH ~/.ssh/id_rsa
+          chmod 700 ~/.ssh && chmod 600 ~/.ssh/id_rsa
+          ssh-keyscan -t rsa uniclust.mmseqs.com >> ~/.ssh/known_hosts
+          cd "${BUILD_SOURCESDIRECTORY}"
+          ssh codeship@uniclust.mmseqs.com "mkdir -p \"/home/mirdita/repositories/mmseqs-webserver/archive/${BUILD_SOURCEVERSION}\""
+          scp mmseqs/userguide.pdf mmseqs-osx-sse41.tar.gz mmseqs-osx-avx2.tar.gz mmseqs-linux-sse41.tar.gz mmseqs-linux-avx2.tar.gz mmseqs-win64.zip codeship@uniclust.mmseqs.com:"/home/mirdita/repositories/mmseqs-webserver/archive/${BUILD_SOURCEVERSION}"
+          ssh codeship@uniclust.mmseqs.com "update-latest-mmseqs.sh \"${BUILD_SOURCEVERSION}\""
--- a/cmake/CheckSymbolExists.cmake
+++ b/cmake/CheckSymbolExists.cmake
-# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
-# file Copyright.txt or https://cmake.org/licensing for details.
-
-macro(CHECK_SYMBOL_EXISTS SYMBOL FILES VARIABLE)
-  if(CMAKE_C_COMPILER_LOADED)
-    __CHECK_SYMBOL_EXISTS_IMPL("${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/CheckSymbolExists.c" "${SYMBOL}" "${FILES}" "${VARIABLE}" )
-  elseif(CMAKE_CXX_COMPILER_LOADED)
-    __CHECK_SYMBOL_EXISTS_IMPL("${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/CheckSymbolExists.cxx" "${SYMBOL}" "${FILES}" "${VARIABLE}" )
-  else()
-    message(FATAL_ERROR "CHECK_SYMBOL_EXISTS needs either C or CXX language enabled")
-  endif()
-endmacro()
-
-macro(__CHECK_SYMBOL_EXISTS_IMPL SOURCEFILE SYMBOL FILES VARIABLE)
-  if(NOT DEFINED "${VARIABLE}" OR "x${${VARIABLE}}" STREQUAL "x${VARIABLE}")
-    set(CMAKE_CONFIGURABLE_FILE_CONTENT "/* */\n")
-    set(MACRO_CHECK_SYMBOL_EXISTS_FLAGS ${CMAKE_REQUIRED_FLAGS})
-    if(CMAKE_REQUIRED_LIBRARIES)
-      set(CHECK_SYMBOL_EXISTS_LIBS
-        LINK_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES})
-    else()
-      set(CHECK_SYMBOL_EXISTS_LIBS)
-    endif()
-    if(CMAKE_REQUIRED_INCLUDES)
-      set(CMAKE_SYMBOL_EXISTS_INCLUDES
-        "-DINCLUDE_DIRECTORIES:STRING=${CMAKE_REQUIRED_INCLUDES}")
-    else()
-      set(CMAKE_SYMBOL_EXISTS_INCLUDES)
-    endif()
-    foreach(FILE ${FILES})
-        set(CMAKE_CONFIGURABLE_FILE_CONTENT ${CMAKE_CONFIGURABLE_FILE_CONTENT} "#include <${FILE}>\n")
-    endforeach()
-    set(CMAKE_CONFIGURABLE_FILE_CONTENT
-        ${CMAKE_CONFIGURABLE_FILE_CONTENT} "\nint main(int argc, char** argv)\n{\n  (void)argv;\n#ifndef ${SYMBOL}\n  return ((int*)(&${SYMBOL}))[argc];\n#else\n  (void)argc;\n  return 0;\n#endif\n}\n")
-
-    configure_file("${CMAKE_ROOT}/Modules/CMakeConfigurableFile.in"
-      "${SOURCEFILE}" @ONLY)
-
-    if(NOT CMAKE_REQUIRED_QUIET)
-      message(STATUS "Looking for ${SYMBOL}")
-    endif()
-    try_compile(${VARIABLE}
-      ${CMAKE_BINARY_DIR}
-      "${SOURCEFILE}"
-      COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS}
-      ${CHECK_SYMBOL_EXISTS_LIBS}
-      CMAKE_FLAGS
-      -DCOMPILE_DEFINITIONS:STRING=${MACRO_CHECK_SYMBOL_EXISTS_FLAGS}
-      "${CMAKE_SYMBOL_EXISTS_INCLUDES}"
-      OUTPUT_VARIABLE OUTPUT)
-    if(${VARIABLE})
-      if(NOT CMAKE_REQUIRED_QUIET)
-        message(STATUS "Looking for ${SYMBOL} - found")
-      endif()
-      set(${VARIABLE} 1 CACHE INTERNAL "Have symbol ${SYMBOL}")
-      file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
-        "Determining if the ${SYMBOL} "
-        "exist passed with the following output:\n"
-        "${OUTPUT}\nFile ${SOURCEFILE}:\n"
-        "${CMAKE_CONFIGURABLE_FILE_CONTENT}\n")
-    else()
-      if(NOT CMAKE_REQUIRED_QUIET)
-        message(STATUS "Looking for ${SYMBOL} - not found")
-      endif()
-      set(${VARIABLE} "" CACHE INTERNAL "Have symbol ${SYMBOL}")
-      file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
-        "Determining if the ${SYMBOL} "
-        "exist failed with the following output:\n"
-        "${OUTPUT}\nFile ${SOURCEFILE}:\n"
-        "${CMAKE_CONFIGURABLE_FILE_CONTENT}\n")
-    endif()
-  endif()
-endmacro()
--- a/cmake/FindASan.cmake
+++ b/cmake/FindASan.cmake
@@ -34,7 +34,6 @@ include(CheckCXXCompilerFlag)
 # Set -Werror to catch "argument unused during compilation" warnings
 set(CMAKE_REQUIRED_FLAGS "-Werror -fsanitize=address") # Also needs to be a link flag for test to pass
 check_cxx_compiler_flag("-fsanitize=address" HAVE_FLAG_SANITIZE_ADDRESS)
-
 unset(CMAKE_REQUIRED_FLAGS)

 if(HAVE_FLAG_SANITIZE_ADDRESS)
@@ -48,12 +47,19 @@ else(NOT ADDRESS_SANITIZER_FLAG)
  set(HAVE_ADDRESS_SANITIZER FALSE)
 endif()

+check_cxx_compiler_flag("-Og" HAVE_OPTIMIZE_DEBUG)
+if(HAVE_OPTIMIZE_DEBUG)
+  set(OPTIMIZE_DEBUG_FLAG "-Og")
+else()
+  set(OPTIMIZE_DEBUG_FLAG "-O0")
+endif()
+
 set(HAVE_ADDRESS_SANITIZER TRUE)

-set(CMAKE_C_FLAGS_ASAN "-O0 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls"
+set(CMAKE_C_FLAGS_ASAN "${OPTIMIZE_DEBUG_FLAG} -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls"
    CACHE STRING "Flags used by the C compiler during ASan builds."
    FORCE)
-set(CMAKE_CXX_FLAGS_ASAN "-O0 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls"
+set(CMAKE_CXX_FLAGS_ASAN "${OPTIMIZE_DEBUG_FLAG} -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls"
    CACHE STRING "Flags used by the C++ compiler during ASan builds."
    FORCE)
 set(CMAKE_EXE_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}"

--- a/cmake/FindBZip2.cmake
+++ b/cmake/FindBZip2.cmake
-# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
-# file Copyright.txt or https://cmake.org/licensing for details.
-
-#.rst:
-# FindBZip2
-# ---------
-#
-# Try to find BZip2
-#
-# IMPORTED Targets
-# ^^^^^^^^^^^^^^^^
-#
-# This module defines :prop_tgt:`IMPORTED` target ``BZip2::BZip2``, if
-# BZip2 has been found.
-#
-# Result Variables
-# ^^^^^^^^^^^^^^^^
-#
-# This module defines the following variables:
-#
-# ::
-#
-#   BZIP2_FOUND - system has BZip2
-#   BZIP2_INCLUDE_DIR - the BZip2 include directory
-#   BZIP2_LIBRARIES - Link these to use BZip2
-#   BZIP2_NEED_PREFIX - this is set if the functions are prefixed with BZ2_
-#   BZIP2_VERSION_STRING - the version of BZip2 found (since CMake 2.8.8)
-
-set(_BZIP2_PATHS PATHS
-  "[HKEY_LOCAL_MACHINE\\SOFTWARE\\GnuWin32\\Bzip2;InstallPath]"
-  )
-
-find_path(BZIP2_INCLUDE_DIR bzlib.h ${_BZIP2_PATHS} PATH_SUFFIXES include)
-
-if (NOT BZIP2_LIBRARIES)
-    find_library(BZIP2_LIBRARY_RELEASE NAMES bz2 bzip2 ${_BZIP2_PATHS} PATH_SUFFIXES lib)
-    find_library(BZIP2_LIBRARY_DEBUG NAMES bz2d bzip2d ${_BZIP2_PATHS} PATH_SUFFIXES lib)
-
-    include(SelectLibraryConfigurations)
-    SELECT_LIBRARY_CONFIGURATIONS(BZIP2)
-endif ()
-
-if (BZIP2_INCLUDE_DIR AND EXISTS "${BZIP2_INCLUDE_DIR}/bzlib.h")
-    file(STRINGS "${BZIP2_INCLUDE_DIR}/bzlib.h" BZLIB_H REGEX "bzip2/libbzip2 version [0-9]+\\.[^ ]+ of [0-9]+ ")
-    string(REGEX REPLACE ".* bzip2/libbzip2 version ([0-9]+\\.[^ ]+) of [0-9]+ .*" "\\1" BZIP2_VERSION_STRING "${BZLIB_H}")
-endif ()
-
-include(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(BZip2
-                                  REQUIRED_VARS BZIP2_LIBRARIES BZIP2_INCLUDE_DIR
-                                  VERSION_VAR BZIP2_VERSION_STRING)
-
-if (BZIP2_FOUND)
-   include(CheckSymbolExists)
-   include(CMakePushCheckState)
-   cmake_push_check_state()
-   set(CMAKE_REQUIRED_QUIET ${BZip2_FIND_QUIETLY})
-   set(CMAKE_REQUIRED_INCLUDES ${BZIP2_INCLUDE_DIR})
-   set(CMAKE_REQUIRED_LIBRARIES ${BZIP2_LIBRARIES})
-   CHECK_SYMBOL_EXISTS(BZ2_bzCompressInit "bzlib.h" BZIP2_NEED_PREFIX)
-   cmake_pop_check_state()
-
-    if(NOT TARGET BZip2::BZip2)
-      add_library(BZip2::BZip2 UNKNOWN IMPORTED)
-      set_target_properties(BZip2::BZip2 PROPERTIES
-        INTERFACE_INCLUDE_DIRECTORIES "${BZIP2_INCLUDE_DIRS}")
-
-      if(BZIP2_LIBRARY_RELEASE)
-        set_property(TARGET BZip2::BZip2 APPEND PROPERTY
-          IMPORTED_CONFIGURATIONS RELEASE)
-        set_target_properties(BZip2::BZip2 PROPERTIES
-          IMPORTED_LOCATION_RELEASE "${BZIP2_LIBRARY_RELEASE}")
-      endif()
-
-      if(BZIP2_LIBRARY_DEBUG)
-        set_property(TARGET BZip2::BZip2 APPEND PROPERTY
-          IMPORTED_CONFIGURATIONS DEBUG)
-        set_target_properties(BZip2::BZip2 PROPERTIES
-          IMPORTED_LOCATION_DEBUG "${BZIP2_LIBRARY_DEBUG}")
-      endif()
-
-      if(NOT BZIP2_LIBRARY_RELEASE AND NOT BZIP2_LIBRARY_DEBUG)
-        set_property(TARGET BZip2::BZip2 APPEND PROPERTY
-          IMPORTED_LOCATION "${BZIP2_LIBRARY}")
-      endif()
-    endif()
-endif ()
-
-mark_as_advanced(BZIP2_INCLUDE_DIR)
--- a/data/CMakeLists.txt
+++ b/data/CMakeLists.txt
@@ -26,8 +26,8 @@ set(COMPILED_RESOURCES
        nucleotide.out
        blosum62.out
        PAM30.out
-        CovSeqidQscPercMinDiag.out
-        CovSeqidQscPercMinDiagTargetCov.out
+        CovSeqidQscPercMinDiag.lib
+        CovSeqidQscPercMinDiagTargetCov.lib
        ExpOpt3_8_polished.cs32.lib
        Library255_may17.lib
        libPure_blosum62_255.lib

--- a/data/CovSeqidQscPercMinDiag.out
+++ b/data/CovSeqidQscPercMinDiag.out
--- a/data/CovSeqidQscPercMinDiagTargetCov.out
+++ b/data/CovSeqidQscPercMinDiagTargetCov.out
--- a/data/blastn.sh
+++ b/data/blastn.sh
@@ -11,12 +11,12 @@ notExists() {

 #pre processing
 [ -z "$MMSEQS" ] && echo "Please set the environment variable \$MMSEQS to your MMSEQS binary." && exit 1;
-# check amount of input variables
+# check number of input variables
 [ "$#" -ne 4 ] && echo "Please provide <queryDB> <targetDB> <outDB> <tmp>" && exit 1;
-# check if files exists
-[ ! -f "$1" ] &&  echo "$1 not found!" && exit 1;
-[ ! -f "$2" ] &&  echo "$2 not found!" && exit 1;
-[   -f "$3.dbtype" ] &&  echo "$3 exists already!" && exit 1;
+# check if files exist
+[ ! -f "$1.dbtype" ] &&  echo "$1.dbtype not found!" && exit 1;
+[ ! -f "$2.dbtype" ] &&  echo "$2.dbtype not found!" && exit 1;
+[   -f "$3.dbtype" ] &&  echo "$3.dbtype exists already!" && exit 1;
 [ ! -d "$4" ] &&  echo "tmp directory $4 not found!" && mkdir -p "$4";



--- a/data/blastp.sh
+++ b/data/blastp.sh
@@ -9,24 +9,20 @@ notExists() {
 	[ ! -f "$1" ]
 }

-#pre processing
-[ -z "$MMSEQS" ] && echo "Please set the environment variable \$MMSEQS to your MMSEQS binary." && exit 1;
-# check amount of input variables
+# check number of input variables
 [ "$#" -ne 4 ] && echo "Please provide <queryDB> <targetDB> <outDB> <tmp>" && exit 1;
-# check if files exists
-[ ! -f "$1" ] &&  echo "$1 not found!" && exit 1;
-[ ! -f "$2" ] &&  echo "$2 not found!" && exit 1;
-[   -f "$3.dbtype" ] &&  echo "$3 exists already!" && exit 1;
+# check if files exist
+[ ! -f "$1.dbtype" ] && echo "$1.dbtype not found!" && exit 1;
+[ ! -f "$2.dbtype" ] && echo "$2.dbtype not found!" && exit 1;
+[   -f "$3.dbtype" ] && echo "$3.dbtype exists already!" && exit 1;
 [ ! -d "$4" ] &&  echo "tmp directory $4 not found!" && mkdir -p "$4";

-
 INPUT="$1"
 TARGET="$2"
 TMP_PATH="$4"

-
 STEP=0
-STEPS=${STEPS:-1}
+STEPS="${STEPS:-1}"
 ALN_RES_MERGE="$TMP_PATH/aln_0"
 while [ "$STEP" -lt "$STEPS" ]; do
    SENS_PARAM=SENSE_${STEP}
@@ -45,6 +41,7 @@ while [ "$STEP" -lt "$STEPS" ]; do
            $RUNNER "$MMSEQS" "${ALIGN_MODULE}" "$INPUT" "$TARGET${ALIGNMENT_DB_EXT}" "$TMP_PATH/pref_$STEP" "$3" $ALIGNMENT_PAR  \
                || fail "Alignment died"
        fi
+        break
    else
        if notExists "$TMP_PATH/aln_$STEP.dbtype"; then
            # shellcheck disable=SC2086
@@ -57,29 +54,43 @@ while [ "$STEP" -lt "$STEPS" ]; do
    if [ "$STEP" -gt 0 ]; then
        if notExists "$TMP_PATH/aln_${SENS}.hasmerged"; then
            if [ "$STEP" -lt $((STEPS-1)) ]; then
-                "$MMSEQS" mergedbs "$1" "$TMP_PATH/aln_merge" "$ALN_RES_MERGE" "$TMP_PATH/aln_$STEP" \
-                || fail "Mergedbs died"
-                ALN_RES_MERGE="$TMP_PATH/aln_merge"
+                # shellcheck disable=SC2086
+                "$MMSEQS" mergedbs "$1" "$TMP_PATH/aln_merge_new" "$ALN_RES_MERGE" "$TMP_PATH/aln_$STEP" ${VERB_COMP_PAR} \
+                    || fail "Mergedbs died"
+                # shellcheck disable=SC2086
+                "$MMSEQS" rmdb "$TMP_PATH/aln_merge" ${VERBOSITY}
+                # shellcheck disable=SC2086
+                "$MMSEQS" mvdb "$TMP_PATH/aln_merge_new" "$TMP_PATH/aln_merge" ${VERBOSITY}
            else
-                "$MMSEQS" mergedbs "$1" "$3" "$ALN_RES_MERGE" "$TMP_PATH/aln_$STEP" \
-                || fail "Mergedbs died"
+                # shellcheck disable=SC2086
+                "$MMSEQS" mergedbs "$1" "$3" "$ALN_RES_MERGE" "$TMP_PATH/aln_$STEP" ${VERB_COMP_PAR} \
+                    || fail "Mergedbs died"
+                break
            fi
            touch "$TMP_PATH/aln_${STEP}.hasmerged"
        fi
    fi
+    if [ "$STEP" -gt 0 ]; then
+      ALN_RES_MERGE="$TMP_PATH/aln_merge"
+    fi

-    NEXTINPUT="$TMP_PATH/input_step$STEP"
+    NEXTINPUT="$TMP_PATH/input_$STEP"
    #do not create subdb at last step
    if [ "$STEP" -lt "$((STEPS-1))" ]; then
-        if notExists "$TMP_PATH/order_step$STEP.dbtype"; then
-            awk '$3 < 2 { print $1 }' "$TMP_PATH/aln_$STEP.index" > "$TMP_PATH/order_step$STEP" \
+        if notExists "$TMP_PATH/order_$STEP.dbtype"; then
+            awk '$3 < 2 { print $1 }' "$TMP_PATH/aln_$STEP.index" > "$TMP_PATH/order_$STEP" \
                || fail "Awk step $STEP died"
        fi

-        if [ ! -s "$TMP_PATH/order_step$STEP" ]; then break; fi
+        if [ ! -s "$TMP_PATH/order_$STEP" ]; then
+            # shellcheck disable=SC2086
+            "$MMSEQS" mvdb "$ALN_RES_MERGE" "$3" ${VERBOSITY}
+            break
+        fi

        if notExists "$NEXTINPUT.dbtype"; then
-            "$MMSEQS" createsubdb "$TMP_PATH/order_step$STEP" "$INPUT" "$NEXTINPUT" \
+            # shellcheck disable=SC2086
+            "$MMSEQS" createsubdb "$TMP_PATH/order_$STEP" "$INPUT" "$NEXTINPUT" ${VERBOSITY} --subdb-mode 1 \
                || fail "Order step $STEP died"
        fi
    fi
@@ -87,17 +98,21 @@ while [ "$STEP" -lt "$STEPS" ]; do
    STEP="$((STEP+1))"
 done

-
 if [ -n "$REMOVE_TMP" ]; then
    echo "Remove temporary files"
    STEP=0
    while [ "$STEP" -lt "$STEPS" ]; do
-        "$MMSEQS" rmdb "${TMP_PATH}/pref_$STEP"
-        "$MMSEQS" rmdb "${TMP_PATH}/aln_$STEP"
-        "$MMSEQS" rmdb "${TMP_PATH}/input_step$STEP"
-        #NEXTINPUT="$TMP_PATH/input_step$STEP" # this line is unused
+        # shellcheck disable=SC2086
+        "$MMSEQS" rmdb "${TMP_PATH}/pref_$STEP" ${VERBOSITY}
+        # shellcheck disable=SC2086
+        "$MMSEQS" rmdb "${TMP_PATH}/aln_$STEP" ${VERBOSITY}
+        # shellcheck disable=SC2086
+        "$MMSEQS" rmdb "${TMP_PATH}/input_$STEP" ${VERBOSITY}
+        rm -f "${TMP_PATH}/order_$STEP"
        STEP="$((STEP+1))"
    done
+    # shellcheck disable=SC2086
+    "$MMSEQS" rmdb "${TMP_PATH}/aln_merge" ${VERBOSITY}
    rm -f "$TMP_PATH/blastp.sh"
 fi


--- a/data/blastpgp.sh
+++ b/data/blastpgp.sh
@@ -11,13 +11,13 @@ notExists() {

 #pre processing
 [ -z "$MMSEQS" ] && echo "Please set the environment variable \$MMSEQS to your MMSEQS binary." && exit 1;
-# check amount of input variables
+# check number of input variables
 [ "$#" -ne 4 ] && echo "Please provide <queryDB> <targetDB> <outDB> <tmp>" && exit 1;
-# check if files exists
-[ ! -f "$1" ] &&  echo "$1 not found!" && exit 1;
-[ ! -f "$2" ] &&  echo "$2 not found!" && exit 1;
-[   -f "$3.dbtype" ] &&  echo "$3 exists already!" && exit 1;
-[ ! -d "$4" ] &&  echo "tmp directory $4 not found!" && mkdir -p "$4";
+# check if files exist
+[ ! -f "$1.dbtype" ] && echo "$1.dbtype not found!" && exit 1;
+[ ! -f "$2.dbtype" ] && echo "$2.dbtype not found!" && exit 1;
+[   -f "$3.dbtype" ] && echo "$3.dbtype exists already!" && exit 1;
+[ ! -d "$4" ] && echo "tmp directory $4 not found!" && mkdir -p "$4";

 QUERYDB="$1"
 TMP_PATH="$4"

--- a/data/cascaded_clustering.sh
+++ b/data/cascaded_clustering.sh
@@ -8,12 +8,27 @@ notExists() {
 	[ ! -f "$1" ]
 }

-# check amount of input variables
+abspath() {
+    if [ -d "$1" ]; then
+        (cd "$1"; pwd)
+    elif [ -f "$1" ]; then
+        if [ -z "${1##*/*}" ]; then
+            echo "$(cd "${1%/*}"; pwd)/${1##*/}"
+        else
+            echo "$(pwd)/$1"
+        fi
+    elif [ -d "$(dirname "$1")" ]; then
+        echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
+    fi
+}
+
+
+# check number of input variables
 [ "$#" -ne 3 ] && echo "Please provide <sequenceDB> <outDB> <tmp>" && exit 1;
-# check if files exists
-[ ! -f "$1" ] &&  echo "$1 not found!" && exit 1;
-[   -f "$2.dbtype" ] &&  echo "$2 exists already!" && exit 1;
-[ ! -d "$3" ] &&  echo "tmp directory $3 not found!" && mkdir -p "$3";
+# check if files exist
+[ ! -f "$1.dbtype" ] && echo "$1.dbtype not found!" && exit 1;
+[   -f "$2.dbtype" ] && echo "$2.dbtype exists already!" && exit 1;
+[ ! -d "$3" ] && echo "tmp directory $3 not found!" && mkdir -p "$3";

 INPUT="$1"
 TMP_PATH="$3"
@@ -27,7 +42,8 @@ if notExists "${TMP_PATH}/clu_redundancy.dbtype"; then
 fi

 if notExists "${TMP_PATH}/input_step_redundancy.dbtype"; then
-    "$MMSEQS" createsubdb "${TMP_PATH}/clu_redundancy" "$INPUT" "${TMP_PATH}/input_step_redundancy" \
+    # shellcheck disable=SC2086
+    "$MMSEQS" createsubdb "${TMP_PATH}/clu_redundancy" "$INPUT" "${TMP_PATH}/input_step_redundancy" ${VERBOSITY} --subdb-mode 1 \
        || faill "createsubdb died"
 fi

@@ -75,7 +91,8 @@ while [ "$STEP" -lt "$STEPS" ]; do
       fi
    else
        if notExists "$NEXTINPUT.dbtype"; then
-            "$MMSEQS" createsubdb "${TMP_PATH}/clu_step$STEP" "$INPUT" "$NEXTINPUT" \
+            # shellcheck disable=SC2086
+            "$MMSEQS" createsubdb "${TMP_PATH}/clu_step$STEP" "$INPUT" "$NEXTINPUT" ${VERBOSITY} --subdb-mode 1 \
                || fail "Order step $STEP died"
        fi
    fi
@@ -88,59 +105,145 @@ if [ -n "$REASSIGN" ]; then
    STEP=$((STEP-1))
    PARAM=ALIGNMENT${STEP}_PAR
    eval ALIGNMENT_PAR="\$$PARAM"
-    # shellcheck disable=SC2086
-    $RUNNER "$MMSEQS" "${ALIGN_MODULE}" "$SOURCE" "$SOURCE" "${TMP_PATH}/clu" "${TMP_PATH}/aln" ${ALIGNMENT_REASSIGN_PAR} \
-             || fail "align1 reassign died"
-    "$MMSEQS" subtractdbs "${TMP_PATH}/clu" "${TMP_PATH}/aln" "${TMP_PATH}/clu_not_accepted" --e-profile 100000 \
-             || fail "subtractdbs1 reassign died"
-    "$MMSEQS" subtractdbs "${TMP_PATH}/clu" "${TMP_PATH}/clu_not_accepted" "${TMP_PATH}/clu_accepted" --e-profile 100000 \
-             || fail "subtractdbs2 reassign died"
-    "$MMSEQS" swapdb "${TMP_PATH}/clu_not_accepted" "${TMP_PATH}/clu_not_accepted_swap" \
-             || fail "swapdb1 reassign died"
-    "$MMSEQS" createsubdb "${TMP_PATH}/clu_not_accepted_swap" "$SOURCE" "${TMP_PATH}/seq_wrong_assigned" \
-             || fail "createsubdb1 reassign died"
-    "$MMSEQS" createsubdb "${TMP_PATH}/clu" "$SOURCE" "${TMP_PATH}/seq_seeds"  \
-             || fail "createsubdb2 reassign died"
+    # align to cluster sequences
+    if notExists "${TMP_PATH}/aln.dbtype"; then
+        # shellcheck disable=SC2086
+        $RUNNER "$MMSEQS" "${ALIGN_MODULE}" "$SOURCE" "$SOURCE" "${TMP_PATH}/clu" "${TMP_PATH}/aln" ${ALIGNMENT_REASSIGN_PAR} \
+                 || fail "align1 reassign died"
+    fi
+    # create file of cluster that do not align based on given criteria
+    if notExists "${TMP_PATH}/clu_not_accepted.dbtype"; then
+        # shellcheck disable=SC2086
+        "$MMSEQS" subtractdbs "${TMP_PATH}/clu" "${TMP_PATH}/aln" "${TMP_PATH}/clu_not_accepted" --e-profile 100000000 -e 100000000 ${THREADSANDCOMPRESS} \
+                 || fail "subtractdbs1 reassign died"
+    fi
+    # create file of cluster that do align based on given criteria
+    if notExists "${TMP_PATH}/clu_accepted.dbtype"; then
+        # shellcheck disable=SC2086
+        "$MMSEQS" subtractdbs "${TMP_PATH}/clu" "${TMP_PATH}/clu_not_accepted" "${TMP_PATH}/clu_accepted" --e-profile 100000000 -e 100000000 ${THREADSANDCOMPRESS} \
+                 || fail "subtractdbs2 reassign died"
+    fi
+    if notExists "${TMP_PATH}/clu_not_accepted_swap.dbtype"; then
+        # shellcheck disable=SC2086
+        "$MMSEQS" swapdb "${TMP_PATH}/clu_not_accepted" "${TMP_PATH}/clu_not_accepted_swap" ${THREADSANDCOMPRESS} \
+                 || fail "swapdb1 reassign died"
+    fi
+    # create sequences database that were wrong assigned
+    if notExists "${TMP_PATH}/seq_wrong_assigned.dbtype"; then
+        # shellcheck disable=SC2086
+        "$MMSEQS" createsubdb "${TMP_PATH}/clu_not_accepted_swap" "$SOURCE" "${TMP_PATH}/seq_wrong_assigned" ${VERBOSITY} \
+                 || fail "createsubdb1 reassign died"
+    fi
+    # build seed sequences
+    if notExists "${TMP_PATH}/seq_seeds.dbtype"; then
+        # shellcheck disable=SC2086
+        "$MMSEQS" createsubdb "${TMP_PATH}/clu" "$SOURCE" "${TMP_PATH}/seq_seeds" ${VERBOSITY} \
+                || fail "createsubdb2 reassign died"
+    fi
    PARAM=PREFILTER${STEP}_PAR
    eval PREFILTER_PAR="\$$PARAM"
-    # shellcheck disable=SC2086
-    $RUNNER "$MMSEQS" prefilter  "${TMP_PATH}/seq_wrong_assigned" "${TMP_PATH}/seq_seeds" "${TMP_PATH}/seq_wrong_assigned_pref" ${PREFILTER_PAR} \
-             || fail "Prefilter reassign died"
-    "$MMSEQS" swapdb "${TMP_PATH}/seq_wrong_assigned_pref" "${TMP_PATH}/seq_wrong_assigned_pref_swaped" \
-             || fail "swapdb2 reassign died"
-    # shellcheck disable=SC2086
-    $RUNNER "$MMSEQS" "${ALIGN_MODULE}" "${TMP_PATH}/seq_seeds" "${TMP_PATH}/seq_wrong_assigned" \
-                                        "${TMP_PATH}/seq_wrong_assigned_pref_swaped" "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln" ${ALIGNMENT_REASSIGN_PAR} \
-             || fail "align2 reassign died"
-    "$MMSEQS" swapdb "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln" "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln_swaped"  \
-             || fail "swapdb3 reassign died"
-
-    "$MMSEQS" filterdb "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln_swaped" "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln_swaped_top1" --extract-lines 1 \
-                || fail "filterdb1 reassign died"
-    "$MMSEQS" filterdb "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln_swaped_top1" "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln_swaped_top1_ocol" --trim-to-one-column \
+    # try to find best matching centroid sequences for prev. wrong assigned sequences
+    if notExists "${TMP_PATH}/seq_wrong_assigned_pref.dbtype"; then
+        # combine seq dbs
+        MAXOFFSET=$(awk '($2+$3) > max{max=$2+$3}END{print max}' "${TMP_PATH}/seq_seeds.index")
+        awk -v OFFSET="${MAXOFFSET}" 'FNR==NR{print $0; next}{print $1"\t"$2+OFFSET"\t"$3}' "${TMP_PATH}/seq_seeds.index" \
+             "${TMP_PATH}/seq_wrong_assigned.index" > "${TMP_PATH}/seq_seeds.merged.index"
+        ln -s "$(abspath "${TMP_PATH}/seq_seeds")" "${TMP_PATH}/seq_seeds.merged.0"
+        ln -s "$(abspath "${TMP_PATH}/seq_wrong_assigned")" "${TMP_PATH}/seq_seeds.merged.1"
+        cp "${TMP_PATH}/seq_seeds.dbtype" "${TMP_PATH}/seq_seeds.merged.dbtype"
+        # shellcheck disable=SC2086
+        $RUNNER "$MMSEQS" prefilter "${TMP_PATH}/seq_wrong_assigned" "${TMP_PATH}/seq_seeds.merged" "${TMP_PATH}/seq_wrong_assigned_pref" ${PREFILTER_REASSIGN_PAR} \
+                 || fail "Prefilter reassign died"
+    fi
+    if notExists "${TMP_PATH}/seq_wrong_assigned_pref_swaped.dbtype"; then
+        # shellcheck disable=SC2086
+        "$MMSEQS" swapdb "${TMP_PATH}/seq_wrong_assigned_pref" "${TMP_PATH}/seq_wrong_assigned_pref_swaped" ${THREADSANDCOMPRESS} \
+                 || fail "swapdb2 reassign died"
+    fi
+    if notExists "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln.dbtype"; then
+        # shellcheck disable=SC2086
+        $RUNNER "$MMSEQS" "${ALIGN_MODULE}" "${TMP_PATH}/seq_seeds.merged" "${TMP_PATH}/seq_wrong_assigned" \
+                                            "${TMP_PATH}/seq_wrong_assigned_pref_swaped" "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln" ${ALIGNMENT_REASSIGN_PAR} \
+                 || fail "align2 reassign died"
+    fi
+
+    if notExists "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln_ocol.dbtype"; then
+        # shellcheck disable=SC2086
+        "$MMSEQS" filterdb "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln" "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln_ocol" --trim-to-one-column ${THREADSANDCOMPRESS} \
                    || fail "filterdb2 reassign died"
-    "$MMSEQS" swapdb "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln_swaped_top1_ocol" "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln_swaped_top1_ocol_swaped" \
-                        || fail "swapdb2 reassign died"
-    "$MMSEQS" mergedbs "$SOURCE" "$2" "${TMP_PATH}/clu_accepted" "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln_swaped_top1_ocol_swaped" \
+    fi
+
+    if notExists "${TMP_PATH}/clu_accepted_plus_wrong.dbtype"; then
+        # combine clusters
+        # shellcheck disable=SC2086
+        "$MMSEQS" mergedbs "${TMP_PATH}/seq_seeds.merged" "${TMP_PATH}/clu_accepted_plus_wrong" "${TMP_PATH}/clu_accepted" \
+                        "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln_ocol" \
                             || fail "mergedbs reassign died"
+    fi
+
+    if notExists "${TMP_PATH}/missing.single.seqs.db.dbtype"; then
+         awk 'FNR==NR{if($3 > 1){ f[$1]=1; }next} !($1 in f){print $1"\t"$1}' "${TMP_PATH}/clu_accepted_plus_wrong.index" "${SOURCE}.index" > "${TMP_PATH}/missing.single.seqs"
+        # shellcheck disable=SC2086
+        "$MMSEQS" tsv2db "${TMP_PATH}/missing.single.seqs" "${TMP_PATH}/missing.single.seqs.db" --output-dbtype 6 ${VERBCOMPRESS} \
+                            || fail "tsv2db reassign died"
+    fi
+
+    if notExists "${TMP_PATH}/clu_accepted_plus_wrong_plus_single.dbtype"; then
+        # combine clusters
+        # shellcheck disable=SC2086
+        "$MMSEQS" mergedbs "${SOURCE}" "${TMP_PATH}/clu_accepted_plus_wrong_plus_single" "${TMP_PATH}/clu_accepted_plus_wrong" \
+                        "${TMP_PATH}/missing.single.seqs.db" \
+                             || fail "mergedbs2 reassign died"
+    fi
+
+    PARAM=CLUSTER${STEP}_PAR
+    eval TMP="\$$PARAM"
+    # shellcheck disable=SC2086
+    "$MMSEQS" clust "${SOURCE}" "${TMP_PATH}/clu_accepted_plus_wrong_plus_single" "${2}" ${TMP} \
+            || fail "Clustering step $STEP died"
+
+    if [ -n "$REMOVE_TMP" ]; then
+        echo "Remove temporary files"
+        "$MMSEQS" rmdb "${TMP_PATH}/aln"
+        "$MMSEQS" rmdb "${TMP_PATH}/clu_not_accepted"
+        "$MMSEQS" rmdb "${TMP_PATH}/clu_accepted"
+        "$MMSEQS" rmdb "${TMP_PATH}/clu_not_accepted_swap"
+        "$MMSEQS" rmdb "${TMP_PATH}/seq_wrong_assigned"
+        "$MMSEQS" rmdb "${TMP_PATH}/seq_seeds"
+        "$MMSEQS" rmdb "${TMP_PATH}/seq_seeds.merged"
+        "$MMSEQS" rmdb "${TMP_PATH}/seq_wrong_assigned_pref"
+        "$MMSEQS" rmdb "${TMP_PATH}/seq_wrong_assigned_pref_swaped"
+        "$MMSEQS" rmdb "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln"
+        "$MMSEQS" rmdb "${TMP_PATH}/seq_wrong_assigned_pref_swaped_aln_ocol"
+        rm -f "${TMP_PATH}/missing.single.seqs"
+        rm -f "${TMP_PATH}/clu_accepted_plus_wrong.tsv"
+        "$MMSEQS" rmdb "${TMP_PATH}/missing.single.seqs.db"
+        "$MMSEQS" rmdb "${TMP_PATH}/clu_accepted_plus_wrong"
+        "$MMSEQS" rmdb "${TMP_PATH}/clu_accepted_plus_wrong_plus_single"
+
+    fi
 fi


 if [ -n "$REMOVE_TMP" ]; then
    echo "Remove temporary files"
-    rm -f "${TMP_PATH}/order_redundancy"
    "$MMSEQS" rmdb "${TMP_PATH}/clu_redundancy"
-    "$MMSEQS" rmdb "${TMP_PATH}/aln_redundancy"
    "$MMSEQS" rmdb "${TMP_PATH}/input_step_redundancy"
    STEP=0
    while [ "$STEP" -lt "$STEPS" ]; do
        "$MMSEQS" rmdb "${TMP_PATH}/pref_step$STEP"
        "$MMSEQS" rmdb "${TMP_PATH}/aln_step$STEP"
        "$MMSEQS" rmdb "${TMP_PATH}/clu_step$STEP"
+        STEP=$((STEP+1))
+    done
+
+    STEP=1
+    while [ "$STEP" -lt "$STEPS" ]; do
        "$MMSEQS" rmdb "${TMP_PATH}/input_step$STEP"
-        rm -f "${TMP_PATH}/order_step$STEP"
        STEP=$((STEP+1))
    done

    rm -f "${TMP_PATH}/cascaded_clustering.sh"
 fi
+
+
--- a/data/clustering.sh
+++ b/data/clustering.sh
@@ -9,12 +9,12 @@ notExists() {
 	[ ! -f "$1" ]
 }

-# check amount of input variables
+# check number of input variables
 [ "$#" -ne 3 ] && echo "Please provide <sequenceDB> <outDB> <tmp>" && exit 1;
-# check if files exists
-[ ! -f "$1" ] &&  echo "$1 not found!" && exit 1;
-[   -f "$2.dbtype" ] &&  echo "$2 exists already!" && exit 1;
-[ ! -d "$3" ] &&  echo "tmp directory $3 not found!" && mkdir -p "$3";
+# check if files exist
+[ ! -f "$1.dbtype" ] && echo "$1.dbtype not found!" && exit 1;
+[   -f "$2.dbtype" ] && echo "$2.dbtype exists already!" && exit 1;
+[ ! -d "$3" ] && echo "tmp directory $3 not found!" && mkdir -p "$3";

 INPUT="$1"
 TMP_PATH="$3"
@@ -32,7 +32,8 @@ if notExists "${TMP_PATH}/clu_redundancy.dbtype"; then
 fi

 if notExists "${TMP_PATH}/input_step_redundancy.dbtype"; then
-    "$MMSEQS" createsubdb "${TMP_PATH}/clu_redundancy" "$INPUT" "${TMP_PATH}/input_step_redundancy" \
+    # shellcheck disable=SC2086
+    "$MMSEQS" createsubdb "${TMP_PATH}/clu_redundancy" "$INPUT" "${TMP_PATH}/input_step_redundancy" ${VERBOSITY} --subdb-mode 1 \
        || fail "MMseqs order step $STEP died"
 fi


--- a/data/createindex.sh
+++ b/data/createindex.sh
@@ -8,25 +8,19 @@ notExists() {
 	[ ! -f "$1" ]
 }

-# check amount of input variables
+# check number of input variables
 [ "$#" -ne 2 ] && echo "Please provide <sequenceDB> <tmp>" && exit 1;
-# check if files exists
-[ ! -f "$1" ] &&  echo "$1 not found!" && exit 1;
-[ ! -d "$2" ] &&  echo "tmp directory $2 not found!" && mkdir -p "$2";
+# check if files exist
+[ ! -f "$1.dbtype" ] && echo "$1.dbtype not found!" && exit 1;
+[ ! -d "$2" ] && echo "tmp directory $2 not found!" && mkdir -p "$2";

 INPUT="$1"
 if [ -n "$TRANSLATED" ]; then
    # 1. extract orf
-    if notExists "$2/orfs.dbtype"; then
-        # shellcheck disable=SC2086
-        "$MMSEQS" extractorfs "$INPUT" "$2/orfs" $ORF_PAR \
-            || fail "extractorfs died"
-    fi
-
    if notExists "$2/orfs_aa.dbtype"; then
        # shellcheck disable=SC2086
-        "$MMSEQS" translatenucs "$2/orfs" "$2/orfs_aa" $TRANSLATE_PAR \
-            || fail "translatenucs died"
+        "$MMSEQS" extractorfs "$INPUT" "$2/orfs_aa" $ORF_PAR \
+            || fail "extractorfs died"
    fi

    # shellcheck disable=SC2086
@@ -35,7 +29,6 @@ if [ -n "$TRANSLATED" ]; then

    if [ -n "$REMOVE_TMP" ]; then
        echo "Remove temporary files"
-        "$MMSEQS" rmdb "$2/orfs"
        "$MMSEQS" rmdb "$2/orfs_aa"
        rm -f "$2/createindex.sh"
    fi

--- a/data/createtaxdb.sh
+++ b/data/createtaxdb.sh
@@ -17,11 +17,9 @@ hasCommand touch
 hasCommand tar

 TAXDBNAME="$1"
-MAPPINGFILE=$2
-NCBITAXINFO="$3"
-TMP_PATH="${4:-$2}"
+TMP_PATH="$2"

-if [ "$DOWNLOAD_DATA" -eq "1" ]; then
+if [ "$DOWNLOAD_NCBITAXDUMP" -eq "1" ]; then
    # Download NCBI taxon information
    if notExists "$4/ncbi_download.complete"; then
        echo "Download taxdump.tar.gz"
@@ -30,11 +28,12 @@ if [ "$DOWNLOAD_DATA" -eq "1" ]; then
        touch "${TMP_PATH}/ncbi_download.complete"
    fi
    NCBITAXINFO="${TMP_PATH}"
-
+fi
+if [ "$DOWNLOAD_MAPPING" -eq "1" ]; then
    # Download the latest UniProt ID mapping to extract taxon identifiers
    if notExists "${TMP_PATH}/mapping_download.complete"; then
        echo "Download idmapping.dat.gz"
-        URL="ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz"
+        URL="ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz"
        wget -nv -O - "$URL" | zcat | awk '$2 == "NCBI_TaxID" {print $1"\t"$3 }' > "${TMP_PATH}/taxidmapping"
        touch "${TMP_PATH}/mapping_download.complete"
    fi

--- a/data/easycluster.sh
+++ b/data/easycluster.sh
@@ -8,19 +8,10 @@ notExists() {
   [ ! -f "$1" ]
 }

-# check number of input variables
-[ "$#" -ne 3 ] && echo "Please provide <sequenceFASTA> <outFile> <tmp>" && exit 1;
-[ ! -f "$1" ] &&  echo "$1 not found!" && exit 1;
-[   -f "$2" ] &&  echo "$2 exists already!" && exit 1;
-[ ! -d "$3" ] &&  echo "tmp directory $3 not found!" && mkdir -p "$3";
-
-INPUT="$1"
-RESULTS="$2"
-TMP_PATH="$3"

 if notExists "${TMP_PATH}/input.dbtype"; then
    # shellcheck disable=SC2086
-    "$MMSEQS" createdb "${INPUT}" "${TMP_PATH}/input" ${CREATEDB_PAR} \
+    "$MMSEQS" createdb "$@" "${TMP_PATH}/input" ${CREATEDB_PAR} \
        || fail "query createdb died"
 fi

@@ -30,7 +21,6 @@ if notExists "${TMP_PATH}/clu.dbtype"; then
        || fail "Search died"
 fi

-
 if notExists "${TMP_PATH}/cluster.tsv"; then
    # shellcheck disable=SC2086
    "$MMSEQS" createtsv "${TMP_PATH}/input" "${TMP_PATH}/input" "${TMP_PATH}/clu" "${TMP_PATH}/cluster.tsv" ${THREADS_PAR} \
@@ -64,6 +54,7 @@ mv "${TMP_PATH}/cluster.tsv"  "${RESULTS}_cluster.tsv"
 if [ -n "${REMOVE_TMP}" ]; then
    echo "Removing temporary files"
    "$MMSEQS" rmdb "${TMP_PATH}/input"
+    "$MMSEQS" rmdb "${TMP_PATH}/input_h"
    "$MMSEQS" rmdb "${TMP_PATH}/clu_seqs"
    "$MMSEQS" rmdb "${TMP_PATH}/clu_rep"
    "$MMSEQS" rmdb "${TMP_PATH}/clu"