Steffen Möller · Steffen Möller · Steffen Möller · Steffen Möller · Steffen Möller · Steffen Möller
--- a/.coveragerc
+++ b/.coveragerc
+[run]
+omit =
+    tests/*
+    setup.py
+    *site-packages/*
+
+[report]
+exclude_lines =
+    pragma: no cover
+    pragma: no-cover
+    def __repr__
+    raise NotImplementedError
+    if __name__ == .__main__.:
--- a/.gitignore
+++ b/.gitignore
@@ -14,7 +14,6 @@ atropos/**/*.so
 doc/_build
 *.pyo
 .idea/
-tests/testtmp
 virtualenv
 *.egg-info
 *.c
@@ -50,4 +49,7 @@ paper/workflow/results
 *.ipynb
 *.tar
 .pytest_cache/
+.coverage
+coverage.xml
 venv/
+.eggs/
\ No newline at end of file
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,32 +5,17 @@ cache:
  directories:
  - "$HOME/.cache/pip"
 python:
-  - "3.4.5"
-  - "3.5"
-  - "3.6"
-  - "3.7"
-  - "3.8"
+- 3.6
+- 3.7
+- 3.8
 install:
 - pip install --upgrade pip wheel
 - pip install Cython
+- pip install pytest-cov
+- pip install coveralls
 - make install
 script:
 - make test
 after_success:
+- coveralls
 - pylint atropos
-# push Docker images for full releases
- >
-    test -n "$TRAVIS_TAG" &&
-    docker login -e $DOCKER_EMAIL -u $DOCKER_USER -p $DOCKER_PASS &&
-    export REPO=jdidion/atropos &&
-    docker build -f Dockerfile -t $REPO:$TRAVIS_TAG . &&
-    docker tag $REPO:$TRAVIS_TAG $REPO:latest &&
-    docker tag $REPO:$TRAVIS_TAG $REPO:$COMMIT &&
-    docker tag $REPO:$TRAVIS_TAG $REPO:travis-$TRAVIS_BUILD_NUMBER &&
-    docker push $REPO
-env:
-  global:
-  - secure: BQFHrr0r2qmehz3uVcKsEV3G4ollqYoSqyxF02W9OsApiAihry14Gi3yFKU8knZsqCzXPHlYShHahqiVTIU9QqGi8xlYSVJ8P0cV7xqapgO1HJhKd/quiRRpwGOHXf3yCbEJvGq/9PQ0bRkT3+4Khu8GHKLWrCfepT6LPpNYCzAQTEhsRCAO1MhOEM3x0Req82UMp5fndJfAnla+XcbhCsurZCaNoxmfm5i4Jv3tAUozWipL1vy2E65P/whyvz9U19CxaO+hvuFWTwHivMRtTVoF4fInvKWcf7XxgJ0tOFeDQBQWxTLS852kPq1W3uDbg084CwmlM89tt/zhCMNFSs6nOfO1TysN+FGFRvP7dW8bq7ht+Vzj+sFR62DaPJn48KU3X1TBqYm+/k9xMnPe+njNPioMp4aDKarSV8mVm1NGCQhJ6Feg7ebpKtcSeBCzUEUHUd3YM7xoV06OLNi4D+qClYYLPP91Z/c0/yGcT/rLo8cX+PdeDQiQ13lSIfWi00mdUdW/mdYnpwgJ9JPe2eS6/8+G3bXDCazGuFjlNDlkw/jDiG/tC6Wbgr5rUN2xLC6ZJB9LCP3JhtCZLR/IHIEOVAQHk/awqgKIK+8V9JFhKc753unFpMFnuL3KreGiVEjX/6UdChnMt0OdadaCU/a8CCtNxnSSlyJ+rEy++Fc= # DOCKER_EMAIL
-  - secure: dDu/K3lqfEmHYGiJO7rdp+RUHqhv76c26u3/I/La3Li9VHuF7rEye3/b7D7PLCZsh49V7VuZ6XVsgVN0mfSkFQV/U+51cmRPDx9poj4IPmUP0Nda04BKWuHjJ7izDZby7aWHPZx6psdLaaBnpw+7zEWiPvDo3Q4Qo8o0ZeD4k3BWQWXh+vSz6TlWWKY0gj6ex/sx+EX/3c/6QRBNYl8IApmOgmUyCzkaXq4sBrjoU4Pm69fSlHge/LU44cvM3++HMYkcaI4M6WUOPuwdjeRZ8wRW/uOJZ5xXV0LQQscB/D82z9OwRirmYVS+O/UXD3r440HIWDm8KPpCFhW1+W3tRytZKt0xm/MeAHEJuvLb3XHa4Tto69SOmgxvLs3CDnJraq4Q8x5T07KYatea/1ChAmiS6mqf6bhcDMNyGaanMTmtfVtnBdTEbvl24rAiu5pxfyW4tHGvptwaSlB8PCH/cLjSJZa5XdiPimld2pbL1MKKplG8JgxHJ8nS/RD9GlT95/nGekOOs49w/zzZZ8wMdoBHOqL/Ln3AwidSC/wxCbOGERm7LyT7lCdQBFA75smQUO43Fmrdjf8mlVZtO6QCAAgyjzOyPaCw9Hhf3bDEEreMIi4g9kBsl+ZWd4W3i1rD64dPNDo24THzxWKqtWI5ufTialA5N+iqms4vjXAs7W4= # DOCKER_USER
-  - secure: r/IVzZCA/wFZlS7nPEbiv7UGpH1e88egGmKLPfOqx1zWy8B5UH+hEjP8nZQo8mTN5vQpWz3FhCLvS3Dg6FMAci+VpUNdjLfbwgPXw5ukioQHaVaX/4+aut4XqWTsWsgcQ+8zL7I7/GR5hL55VnZrX2R7vh6iEPV3942P2dNiaJ4PDx6XKQeSrfEwQGjRxTK4jfaZZDMr8QpWZuVY4KqynfDnY4UTG+udqJaJi2AYbh39JKN1aGpo7QXj1+kXK7T+XOR5YW+a2hC8d2f6INHothjLG5r/SdgZS8Ma3lGuVq+llEiqJnAW6C4ldSvkk9H54R1T5tO0HhBiiOi4bhglxzD0F976nsxbzV6SAKoOAANR+TR5par1BUlnFO2uIrIVvdhvj806M/dz1ZLkMnHSpF+Fs5z+uwZwJIWjw8I2+R/DiZL/fbUx6Smvqw8ZHYlXjXZG1PhSxyyu00EQfFCjPHGtW1N9CcEoexNuXj+qjY/e/rs9kCYaBczLV8X41807BvxSkwyXUH5DUjTMraW/6b7tParp754cw2jGAvSG5ceqZHKqoZXa+OAY7Bck7oP5nu/xNfG1upv2vjgzCaJ5gErOBP4e/4wrBnmcZkAgLVFtz9yNuL0zBaE3ubnoN+8DyHTsXQVO9f/p6x6ODAH4N2MS6PWYWvLssKKh7DrOPVQ= # DOCKER_PASS
-  - COMMIT=${TRAVIS_COMMIT::8}
--- a/CHANGES.md
+++ b/CHANGES.md
 # Changes

-v1.1.24 (2019.11.27)
-
-* Fix #87 - Python 3.8 compatibility, time.clock removed
-
-v1.1.23 (2019.11.22)
-
-* Set minimum Python version to 3.4.5
-* Fixed #86 - no trimming performed for single-end BAM file
-
-v1.1.22 (2019.05.20)
+## 2.0.0-alpha.3 (dev)
+
+## 2.0.0-alpha.2 (2020.01.02)
+
+* *Breaking changes:*
+  * Renamed `--read1_umi` and `--read2-umi` options to `--read1-umi` and `--read2-umi`
+  * `detect --fasta` option now accepts only a single value, either 'perinput', 'union', or 'both'
+* Replace pysam with bamnostic
+* Re-enable testing on Python 3.8
+* Added '--umi-input' option to obtain UMIs from a separate input file.
+  * This addresses #66: Support UMIs from a third input file.
+* When writing SAM output for reads with UMIs, write the UMIs using the `BC` tag.
+* Refactored test code to use pytest fixtures and generate coverage metrics
+* Upgrade to xphyle 4.2.2
+
+## 2.0.0-alpha.1 (2019.12.25)
+
+* *Breaking changes:*
+  * Dropped support for python 3.3-3.5 in order to take advantage of many new features in 3.6 (such as type annotations), and to migrate to xphyle for file management. The 1.1.x branch will maintain 3.3-3.5 support and will receive any new bug fixes (but no new features).
+  * The `--compression` argument has been renamed to `--compression-mode`, to avoid confusion with the new `--compression-format` option (see below).
+  * The `--format option` has been renamed to `--input-format`, to avoid confusion with the new `--output-format` option
+  * The `--stats option` has been renamed to `--metrics`.
+  * The `--nextseq-trim` option has been renamed to `--twocolor-trim`.
+  * The `--sra-accession` option has been renamed to `--accession` and now accepts a protocol prefix.
+  * Dropped the --discard alias for `--discard-trimmed`.
+  * Dropped the --trimmed-only alias for `--discard-untrimmed`.
+* Merged PR #63: Implementation of UMI support. Thanks @wckdouglas!
+* Eliminated the bin/ folder; switch to using entry points for the atropos executable.
+* Fix #32: SAM output.
+* Fix #36: Progress bars don't increment correctly when batch size > 1 used.
+* Moved all file management code to use [xphyle](https://github.com/jdidion/atropos/tree/xphyle)
+* Added --compression-format option to override filename-based detection of compression format, and to enable compressed output to stdout.
+* Added --output-format option to manually specify output format instead of determining the format from the output file name.
+* Added --query option to specify a URL for a supported streaming protocol (e.g. htsget).
+* Enabled output to stdout by default with single-end and interleaved reads.
+* Migrated to setuptools_scm for version management.
+
+## v1.1.24 (2019.11.17)
+
+* Fix #87 - Python 3.8 incompatibility - change time.clock() to time.process_time()
+
+## v1.1.23 (2019.11.22)
+
+Set minimum Python version to 3.4.5
+Fixed #86 - no trimming performed for single-end BAM file
+
+## v1.1.22 (2019.05.20)

 * Documentation fixes (#79)
 * Fix for index error when running `detect` command (#80)

-v1.1.21 (2018.11.24)
+## v1.1.21 (2018.11.24)

 * Added long options for paired adapter parameters.

-v1.1.20 (2018.11.21)
+## v1.1.20 (2018.11.21)

-* Fix #74: Make pysam open SAM/BAM files with check_sq=False by default
+* Fix #74: Make pysam open SAM/BAM files with `check_sq=False` by default
 * Fixed setup.py to correctly include README for PyPI.

-v1.1.19 (2018.05.16)
+## v1.1.19 (2018.05.16)

 * Fix #68: Error when using insert aligner with adapters of different lengths

-v1.1.18 (2018.03.16)
+## v1.1.18 (2018.03.16)

 * Added two new tools to the benchmarks:
  * fastp
@@ -35,78 +72,76 @@ v1.1.18 (2018.03.16)
 * Updated versions of several tools used in the paper. Rebuilt containers and pushed them to Dockerhub.
 * Fix #64: InsertAligner not respecting match_adapter_wildcards and match_read_wildcards options.

-v1.1.17 (2018.01.13)
+## v1.1.17 (2018.01.13)

 * Fix #51: Reads of different lengths not error corrected.

-v1.1.16 (2018.01.07)
+## v1.1.16 (2018.01.07)

 * Fix for #57: LegacyReport stops on adapter with no trimmed reads, and LegacyReport errors when histogram data is None. Thanks to @cokelaer and @pyMyt1!
 * Fix for #58: NextSeqTrimmer not trimming from both ends. Thanks to @pkMyt1!

-v1.1.15 (2017.09.28)
+## v1.1.15 (2017.09.28)

 * Fix for #41: Error when using progress bar.
 * Fix for #42: Discordance between Cutadapt and Atropos in number of expected events.
 * Added '--alphabet' option. Set to 'dna' to validate input sequences against the allowed DNA characters (A/C/G/T/N). This fixes #43 and partially fixes #44.
 * Fixed #44: Uncaught errors not being logged.

-v1.1.14 (2017.09.19)
+## v1.1.14 (2017.09.19)

 * Fix for #39: miRNA option error (thanks to @mottodora)
 * Fix for #37: fixes overflow error when computing RandomMatchProbability on long reads (>150 bp)

-v1.1.13 (2017.09.14)
+## v1.1.13 (2017.09.14)

 * Fix for #38: Atropos fails with MultiCore error when using OrderPreservingWriterResultsHandler (thanks to @cshenanigans)

-v1.1.12 (2017.08.15)
+## v1.1.12 (2017.08.15)

 * Expose --min-frequency and --min-contaminant-match-frac options to 'detect -d heuristic' command.
 * Expose --min-kmer-match-frac option to 'detect -d known' command.
 * Fixed #35: using incorrect metric to determine match fraction in 'detect -d known' command.

-v1.1.11 (2017.08.15)
+## v1.1.11 (2017.08.15)

 * Fixed #34: JSON report output not working with SRA streaming.

-v1.1.10 (2017.08.09)
+## v1.1.10 (2017.08.09)

 * Improve debugging messages

-v1.1.9 (2017.08.01)
+## v1.1.9 (2017.08.01)

 * Fix #30: failure when using --preserve-order option

-v1.1.8 (2017.07.10)
+## v1.1.8 (2017.07.10)

 * Add --config option for specifying options in a config file.
 * Fix for #29: allow paired-end quality and N trimming without adapter trimming.
 * Removed twine register command from make release

-v1.1.7 (2017.06.01)
+## v1.1.7 (2017.06.01)

-* Stream reads directly from an SRA accession for any atropos command using the
-  -sra option.
-* Add detect option to specify the bases that signify when the sequencer has
-  read past the end of a fragment.
+* Stream reads directly from an SRA accession for any atropos command using the -sra option.
+* Add detect option to specify the bases that signify when the sequencer has read past the end of a fragment.

-v1.1.6 (2017.05.30)
+## v1.1.6 (2017.05.30)

 * Add FASTA output for detect command, and enable json, yaml, and pickle output for all commands.

-v1.1.5 (2017.05.18)
+## v1.1.5 (2017.05.18)

 * Major update to the documentation.
 * Fixed error messages in multi-threaded mode.
 * Fixed bug when generating reports for runs involving error correction.

-v1.1.4 (2017.05.02)
+## v1.1.4 (2017.05.02)

 * Exposed option to set PRNG seed when subsampling reads.
 * Fixed issue #14: 'detect' and 'error' commands were broken. This involved rewriting those commands to use the same pipeline and reporting frameworks as the 'trim' and 'qc' commands.

-v1.1.3 (2017.05.01)
+## v1.1.3 (2017.05.01)

 * Updated Dockerfile to use smaller, Alpine-based image.
 * Added Docker image for v1.1.2 to Docker Hub.
@@ -115,7 +150,7 @@ v1.1.3 (2017.05.01)
 * Fixed #12: tqdm progress bar not working.
 * Fixed #13: unnecessary differences in summary output between Cutadapt and Atropos.

-v1.1.2 (2017.04.12)
+## v1.1.2 (2017.04.12)

 * New 'qc' command computes read-level statistics.
 * The 'trim' command can also compute read-level statistic pre- and/or post-trimming using the new '--stats' option.
@@ -132,46 +167,45 @@ v1.1.2 (2017.04.12)
 * Implemented Atropos module for MultiQC, which reads reports in JSON format. This is currently available [here](http://github.com/jdidion/atropos-multiqc) and will hopefully soon be merged into MultiQC.
 * Ported some recent enhancments over from Cutadapt.

-v1.0.23 (2016.12.07)
+## v1.0.23 (2016.12.07)

 * Identified a subtle bug having to do with insufficient memory in multi-threaded mode. The main thread appears to hang waiting for the next read from the input file. This appears to occur only under a strictly-regulated memory cap such as on cluster environment. This bug is not fixed, but I added the following:
    * Set the default batch size based on the queue sizes
    * Warn the user if their combination of batch and queue sizes might lead to excessive memory usage.
 * Bug fixes

-v1.0.22 (2016.12.02)
+## v1.0.22 (2016.12.02)

 * Abstracted the ErrorEstimator class to enable alternate implementations.
 * Added a new ShadowRegressionErrorEstimator that uses the ShadowRegression R package (Wang et al.) to more accurately estimate sequencing error rate. This requires that R and the [ShadowRegression package](http://bcb.dfci.harvard.edu/~vwang/shadowRegression.html) and its dependencies be installed -- MASS and ReadCount, which in turn depend on a bunch of Bioconductor packages. At some point, this dependency will go away when I reimplement the method in pure python.
 * The error command now reports the longest matching read fragment, which is usually a closer match for the actual adapter sequence than the longest matching k-mer.

-v1.0.21 (2016.11.23)
+## v1.0.21 (2016.11.23)

 * Bugfixes

-v1.0.20 (2016.11.22)
+## v1.0.20 (2016.11.22)

 * Changed the order of trimming operations - OverwriteReadModifier is now after read and quality trimming.
 * Refactored the main Atropos interface to improve testability.
 * Added more unit tests.

-v1.0.19 (2016.11.21)
+## v1.0.19 (2016.11.21)

 * Fixed a major bug in OverwriteReadModifier, and in the unit tests for paired-end trimmers.

-v1.0.18 (2016.11.20)
+## v1.0.18 (2016.11.20)

 * Added OverwriteReadModifier, a paired-end modifier that overwrites one read end with the other if the mean quality over the first N bases (where N is user-specified) of one is below a threshold value and the mean quality of the other is above a second threshold. This dramatically improves the number of high-quality read mappings in data sets where there are systematic problems with one read-end.

-v1.0.17 (2016.11.18)
+## v1.0.17 (2016.11.18)

 * Perform error correction when insert match fails but adapter matches are complementary
 * Improvements to handling of cached adapter lists
 * Merged reads are no longer written to --too-short-output by default
 * Many bugfixes and improvements in deployment (including a Makefile)
-* Many

-v1.0.16 (2016.09.20)
+## v1.0.16 (2016.09.20)

 * Migrate to Versioneer for version management.
 * Enable stderr as a valid output using the '\_' shortcut.
@@ -181,13 +215,13 @@ v1.0.16 (2016.09.20)
 * We are beginning to move towards the use of commands for all operations, and read-trimming now falls under the 'trim' command. Currently, calling atropos without any command will default to the 'trim' command.
 * When InsertAdapterCutter.symmetric is True and mismatch_action is not None, insert match fails, at least one adapter match succeeds, and the adapter matches (if there are two) are complementary, then the reads are treated as overlapping and error correction is performed. This leads to substantial improvements when one read is of good quality while the other is other is of poor quality.

-v1.0.15 (2016.09.14)
+## v1.0.15 (2016.09.14)

 * Fixed missing import bug in 'detect' command.
 * Added estimate of fraction of contaminated reads to output of 'detect' command.
 * Optionally cache list of known contaminants rather than re-download it every time.

-v1.0.14 (2016.09.13)
+## v1.0.14 (2016.09.13)

 * Implemented \_align.MultiAligner, which returns all matches that satisfy the overlap and error thresholds. align.InsertAligner now uses MultiAligner for insert matching, and tests all matches in decreasing size order until it finds one with adapter matches (if any).
 * Major improvements to the accuracy of the 'detect' command.
@@ -198,53 +232,54 @@ v1.0.14 (2016.09.13)
 * Fixed a segmentation fault that occurs when trying to trim zero-length reads with the insert aligner.
 * Sevaral other bugfixes.

-v1.0.13 (2016.08.31)
+## v1.0.13 (2016.08.31)

 * Add options to specify max error rates for insert and adapter matching within insert aligner.
 * Add new command to estimate empirical error rate in data set from base qualities.

-v1.0.12 (2016.08.30)
+
+## v1.0.12 (2016.08.30)

 * Add ability to correct errors during insert-match adapter trimming.
 * Implement additional adapter-detection algorithms.
 * Fix bug where default output file is force-created in parallel-write mode

-v1.0.11 (2016.08.24)
+## v1.0.11 (2016.08.24)

 * Clarify and fix issues with bisulfite trimming. Notably, rrbs and non-directional are now allowed independently or in combination.

-v1.0.10 (2016.08.23)
+## v1.0.10 (2016.08.23)

 * Introduced new 'detect' command for automatically detecting adapter sequences.
 * Options are now required to specify input files.
 * Major updates to documentation.

-v1.0.9 (2016.08.22)
+## v1.0.9 (2016.08.22)

 * Bugfix release

-v1.0.8 (2016.08.19)
+## v1.0.8 (2016.08.19)

 * Reverted previously introduced (and no longer necessary) dependency on bitarray).
 * Switched the insert aligner back to the default implementation, as the one that ignores indels is not any faster.

-v1.0.7 (2016.08.18)
+## v1.0.7 (2016.08.18)

 * Re-engineered modifiers.py (and all dependent code) to enable use of modifiers that simultaneously edit both reads in a pair.
 * Add --op-order option to enable use to specify order of first four trimming operations.
 * Implemented insert-based alignment for paired-end adapter trimming. This is currently experimental. Benchmarking against SeqPurge and Skewer using simulated reads showed that the method Cutadapt uses to align adapters, while optimal for single-end reads, is much less sensitive and specific than the insert match algorithms used by SeqPurge and Skewer. Our algorithm is similar to the one used by SeqPurge but leverages the dynamic programming model of Cutadapt.

-v1.0.6 (2016.08.08)
+## v1.0.6 (2016.08.08)

 * Based on tests, worker compression is faster than writer compression when more than 8 threads are available, so set this to be the default.

-v1.0.5 (2016.08.06)
+## v1.0.5 (2016.08.06)

 * Interanal code reorganization - compression code moved to separate module
 * Eliminated the --worker-compression option in favor of --compression (whose value is either 'worker' or 'writer')
 * More documentation improvements

-v1.0.3 (2016.08.05)
+## v1.0.3 (2016.08.05)

 * Significant performance improvements:
    * Start an extra worker once the main process is finished loading reads
@@ -254,11 +289,11 @@ v1.0.3 (2016.08.05)
 * Disable quality trimming if all cutoffs are set to 0
 * Eliminated the --parallel-environment option

-v1.0.1 (2016.08.04)
+## v1.0.1 (2016.08.04)

 * Fix documentation bugs associated with migration from optparse to argparse

-v1.0 (2016.07.29)
+## v1.0 (2016.07.29)

 * Initial release (forked from cutadapt 1.10)
 * Re-wrote much of filters.py and modifiers.py to separate modifying/filtering from file writing.

--- a/CITATION
+++ b/CITATION
-John P Didion, Marcel Martin, and Francis S Collins. "Atropos: specific, sensitive, and speedy trimming of sequencing reads." In preparation.
+Didion JP, Martin M, Collins FS. (2017) Atropos: specific, sensitive, and speedy trimming of sequencing reads. PeerJ 5:e3720 https://doi.org/10.7717/peerj.3720

 @ARTICLE{Didion2016Cutadapt,
  author = {Didion JP, Martin M, and Collins FS},
  title = {Atropos: specific, sensitive, and speedy trimming of sequencing reads.},
-  journal = {submitted},
-  year = 2017
+  journal = {PeerJ},
+  volume = {5},
+  pages = {e3720},
+  doi = {10.7717/peerj.3720}
 }
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,7 +2,7 @@
 # Dockerfile
 #
 # Software:         Atropos
-# Software Version: 1.1.0
+# Software Version: 2.0.0
 # Description:      Atropos image
 # Website:          https://github.com/jdidion/atropos
 # Provides:         atropos
@@ -28,7 +28,7 @@ RUN export NPROC=$(grep -c ^processor /proc/cpuinfo 2>/dev/null || 1) \
        zlib-dev bzip2-dev xz-dev

 # Additional Atropos dependencies
-RUN pip install tqdm pysam jinja2
+RUN pip install tqdm bamnostic jinja2

 # Attach project directory and install
 ADD . /atropos/

--- a/MANIFEST.in
+++ b/MANIFEST.in
 # documentation
-include README.rst
+include README.md
 include CHANGES.md
 include CITATION
 include LICENSE.txt
@@ -7,15 +7,12 @@ include doc/*.rst
 include doc/conf.py
 include doc/Makefile
 include atropos/**/*.pyx
-include atropos/align/_align.c
+include atropos/aligner/_aligner.c
 include atropos/commands/trim/_qualtrim.c
-include atropos/io/_seqio.c
+include atropos/io/readers/_readers.c
+include atropos/io/sequence/_sequence.c
 include atropos/adapters/*.fa
-include atropos/report/templates/*
-include bin/_preamble.py
 include tests/test*.py
 include tests/utils.py
-graft tests/data
-graft tests/cut
-include versioneer.py
-include atropos/_version.py
+graft tests/data/input
+graft tests/data/expected
\ No newline at end of file
--- a/Makefile
+++ b/Makefile
 tests = tests
-module = atropos
-#pytestops = --full-trace
-#pytestops = -v -s
-repo = jdidion/$(module)
+package = atropos
+#pytestopts = -s -vv --show-capture=all
+repo = jdidion/$(package)
 desc = Release $(version)

-BUILD =
-TEST =
+all: clean install install_extra_requirements install_test_requirements test test_release_setup

-all: clean install test
+clean:
+	rm -Rf __pycache__
+	rm -Rf .pytest_cache
+	rm -Rf .eggs
+	rm -Rf **/__pycache__/*
+	rm -Rf **/*.c
+	rm -Rf **/*.so
+	rm -Rf **/*.pyc
+	rm -Rf dist
+	rm -Rf build
+	rm -Rf atropos.egg-info
+	rm -f .adapters
+	rm -f .coverage
+	rm -f coverage.xml
+	rm -f MANIFEST

 build:
 	python setup.py build_ext -i
 	python setup.py sdist bdist_wheel

 install: clean build
-	python setup.py install $(installargs)
+	pip install --upgrade dist/*.whl $(installargs)
+
+install_test_requirements:
+	pip install -r requirements-test.txt
+
+install_extra_requirements:
+	pip install -r requirements-extra.txt

-test:
-	py.test $(pytestops) $(tests)
+test: install install_extra_requirements install_test_requirements
+	coverage run -m pytest $(pytestopts) $(tests)
+	coverage report -m
+	coverage xml
+
+test_release_setup:
+	twine check dist/*

 docs:
 	make -C doc html

-readme:
-	pandoc --from=markdown --to=rst --output=README.rst README.md
-	pandoc --from=markdown --to=rst --output=CHANGES.rst CHANGES.md
-
 lint:
-	pylint $(module)
+	pylint $(package)

-clean:
-	rm -Rf __pycache__
-	rm -Rf **/__pycache__/*
-	rm -Rf **/*.c
-	rm -Rf **/*.so
-	rm -Rf **/*.pyc
-	rm -Rf dist
-	rm -Rf build
-	rm -Rf .adapters
-	rm -Rf atropos.egg-info
+reformat:
+	black $(package)
+	black $(tests)

 docker:
 	# build
@@ -47,22 +59,36 @@ docker:
 	# add alternate tags
 	docker tag $(repo):$(version) $(repo):latest
 	# push to Docker Hub
-	docker login -u jdidion && \
-	docker push $(repo)
+	docker login && docker push $(repo)

 tag:
 	git tag $(version)

-release: clean tag install test
-	twine upload dist/*
+push_tag:
 	git push origin --tags

+del_tag:
+	git tag -d $(version)
+
+pypi_release:
+	twine upload dist/*
+
+release: clean tag
+	${MAKE} install test pypi_release push_tag || (${MAKE} del_tag && exit 1)
+
 	# github release
 	curl -v -i -X POST \
 		-H "Content-Type:application/json" \
 		-H "Authorization: token $(token)" \
 		https://api.github.com/repos/$(repo)/releases \
-		-d '{"tag_name":"$(version)","target_commitish": "master","name": "$(version)","body": "$(desc)","draft": false,"prerelease": false}'
+		-d '{\
+		  "tag_name":"$(version)",\
+		  "target_commitish": "master",\
+		  "name": "$(version)",\
+		  "body": "$(desc)",\
+		  "draft": false,\
+		  "prerelease": false \
+		}'

 # build a package with the files needed to run the workflows
 workflow:

--- a/README.md
+++ b/README.md
-[![Travis CI](https://travis-ci.org/jdidion/atropos.svg?branch=1.1)](https://travis-ci.org/jdidion/atropos)
+[![Travis CI](https://travis-ci.org/jdidion/atropos.svg?branch=develop)](https://travis-ci.org/jdidion/atropos)
 [![PyPi](https://img.shields.io/pypi/v/atropos.svg)](https://pypi.python.org/pypi/atropos)
 [![DOI](https://zenodo.org/badge/61393086.svg)](https://zenodo.org/badge/latestdoi/61393086)
+[![Coverage Status](https://img.shields.io/coveralls/jdidion/atropos/master.svg)](https://coveralls.io/github/jdidion/atropos?branch=develop)

 # Atropos

@@ -11,11 +12,11 @@ Atropos is tool for specific, sensitive, and speedy trimming of NGS reads. It is
 3. Options for trimming specific types of data (miRNA, bisulfite-seq).
 4. A new command ('detect') that will detect adapter sequences and other potential contaminants.
 5. A new command ('error') that will estimate the sequencing error rate, which helps to select the appropriate adapter- and quality- trimming parameter values.
-6. A new command ('qc') that generates read statistics similar to FastQC. The trim command can also compute read statistics both before and after trimming (using the '--stats' option).
+6. A new command ('qc') that`                                           ``` generates read statistics similar to FastQC. The trim command can also compute read metrics both before and after trimming (using the '--metrics' option).
 7. Improved summary reports, including support for serialization formats (JSON, YAML, pickle), support for user-defined templates (via the optional Jinja2 dependency), and integration with [MultiQC](http://multiqc.info).
 8. The ability to merge overlapping reads (this is experimental and the functionality is limited).
 9. The ability to write the summary report and log messages to separate files.
-10. The ability to read SAM/BAM files and read/write interleaved FASTQ files.
+10. The ability to read/write SAM, BAM, and interleaved FASTQ files.
 11. Direct trimming of reads from an SRA accession.
 12. A progress bar, and other minor usability enhancements.

@@ -26,19 +27,21 @@ Atropos is available from [pypi](https://pypi.python.org/pypi/atropos) and can b
 First install dependencies:

 * Required
-    * Python 3.4.5+ (python 2.x is NOT supported)
-    * Cython 0.25.2+ (`pip install Cython`)
-* Maybe python libraries
+    * Python 3.6+ (python 2.x is NOT supported)
+    * Cython 0.25.2+/0.29+/0.29.14+, depending on whether you're using python 3.6/3.7/3.8 (`pip install Cython`)
+    * [loguru]()
+    * [pokrok]() 0.2.0+
+    * [xphyle]() 4.2.1+
+* Optional
    * pytest (for running unit tests)
-    * progressbar2 or tqdm (progressbar support)
-    * pysam (SAM/BAM input)
+    * bamnostic or pysam (SAM/BAM support)
    * khmer 2.0+ (for detecting low-frequency adapter contamination)
    * jinja2 (for user-defined report formats)
    * ngstream (for SRA streaming), which requires [ngs](https://github.com/ncbi/ngs)

 Pip can be used to install atropos and optional dependencies, e.g.:

-pip install atropos[tqdm,pysam,ngstream]
+`pip install atropos[tqdm,bamnostic,ngstream]`

 ## Conda

@@ -76,6 +79,10 @@ atropos --aligner insert -a AGATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCC

 See the [Documentation](https://atropos.readthedocs.org/) for more complete usage information.

+## Using Atropos as a library
+
+While we consider the command-line interface to be stable, the internal code organization of Atropos is likely to change. At this time, we recommend to not directly interface with Atropos as a library (or to be prepared for your code to break).
+
 ## Publications

 Atropos is [published](https://peerj.com/articles/3720/) in PeerJ.
@@ -90,7 +97,6 @@ The citation for the original Cutadapt paper is:

 > Marcel Martin. "Cutadapt removes adapter sequences from high-throughput sequencing reads." EMBnet.Journal, 17(1):10-12, May 2011. http://dx.doi.org/10.14806/ej.17.1.200

-
 ## Links

 * [Documentation](https://atropos.readthedocs.org/)
@@ -98,106 +104,3 @@ The citation for the original Cutadapt paper is:
 * [Report an issue](https://github.com/jdidion/atropos/issues)
 * [Code of conduct](https://github.com/jdidion/atropos/CODE_OF_CONDUCT.md)
 * [Contributing](https://github.com/jdidion/atropos/CONTRIUBTING.md)
-
-## Roadmap
-
-### 1.2
-
-* Migrate to [xphyle](https://github.com/jdidion/xphyle) for file management.
-* Migrate to [pokrok](https://github.com/jdidion/pokrok) for progress bar management.
-* Accept multiple input files.
-* Support SAM output (including #33).
-* Direct streaming and trimming of reads from SRA and htsget using [ngstream](https://github.com/jdidion/ngstream).
-* Read "cropping" (#50)
-* Support for ThruPlex-style adapters (in which barcode is part of query sequence; #55)
-* Accessibility:
-    * Create recipe for homebrew.
-    * Automatically update conda and homebrew recipes for each release.
-    * Create Galaxy tool description using [argparse2tool](https://github.com/erasche/argparse2tool#cwl-specific-functionality).
-* Improve documentation (#24)
-* Port over improvements in latest versions of Cutadapt https://cutadapt.readthedocs.io/en/stable/
-* Switch to using entry point instead of Atropos executable.
-
-### 1.3
-
-* Add auto-trimming mode for paired-end reads.
-* Support for UMIs.
-* Provide PacBio- and nanopore-specific options (https://github.com/marcelm/cutadapt/issues/120).
-* Provide option for RNA-seq data that will trim polyA sequence.
-* Add formal config file support (#53)
-* Automate crash reporting using [sentry](https://github.com/getsentry/raven-python).
-* Look at [NGMerge] for improving read merging: https://github.com/harvardinformatics/NGmerge
-* Look at replacing pysam with [pybam](https://github.com/JohnLonginotto/pybam)
-
-### 1.4
-
-* Currently, InsertAligner requires a single 3' adapter for each end. Adapter trimming will later be generalized so that A) the InsertAligner can handle multiple matched pairs of adapters and/or B) multiple different aligners can be used for different adapters.
-* Integrate with [AdapterBase](https://github.com/NCBI-Hackathons/OnlineAdapterDatabase) for improved matching of detected contaminants to known adapters, automated trimming of datasets with known adapters, and (opt-in) submission of adapter information for novel datasets.
-* Migrate to seqio (https://github.com/jdidion/seqio) for reading/writing sequence files.
-    * Also look at using HTSeq instead: https://github.com/simon-anders/htseq
-* General-purpose read filtering based on read ID: https://github.com/marcelm/cutadapt/issues/107.
-* Currently, SAM/BAM input files must be name sorted; add an option to 1) pre-sort reads inline using samtools or sambamba, or 2) cache each read in memory until its mate is found.
-
-### 1.5
-
-* Provide more user control over anchoring of adapters: https://github.com/marcelm/cutadapt/issues/53.
-* Enable user to define custom read structure: https://github.com/nh13/read-structure-examples
-* Support for paired-end demultiplexing
-* Demultiplexing based on barcodes: https://github.com/marcelm/cutadapt/issues/118.
-* Consider supporting different error rates for read1 vs read2.
-* Add a ClipOverlapping modifier that will remove read overlaps (as opposed to merging).
-* Look more closely at providing solutions to the Illumina two-color chemistry issue:
-    * Provide and option to exempt G calls from the assessment of quality
-    * Trim 3′ Gs from reads
-* Also look at addressing any issues with one-color chemistry (iSeq).
-* Consider whether to support trimming/QC of raw IonTorrent data.
-
-### 1.6
-
-* Switch to using Click for CLI.
-* Implement a public plugin API.
-* Add more logging and convert log messages from old-style to new-style format strings.
-* Add option to estimate bisulfite conversion rate from filled-in cytosine methylation status in reads that were MspI-digested.
-* CPU and memory profiling. Try out:
-    * https://github.com/nvdv/vprof
-    * https://github.com/what-studio/profiling
-    * https://github.com/fabianp/memory_profiler
-    * https://github.com/rkern/line_profiler#line-profiler
-* Look at some new trimming/qc programs; decide whether to add to benchmarks and/or incorporate any of their features
-    * https://github.com/OpenGene/fastp/issues
-    * http://tagcleaner.sourceforge.net/
-    * https://github.com/mdshw5/fastqp/blob/master/README.md
-
-### 2.0
-
-* Simplification of command line options, perhaps by further splitting functionality up into different sub-commands, but also by more intelligent choices for default option values based on context.
-
-* Consider adding additional report formats
-* Add fuzzy matching: https://github.com/Daniel-Liu-c0deb0t/Java-Fuzzy-Search
-* https://github.com/marcelm/cutadapt/issues/112
-* Performance enhancements using
-    * http://numba.pydata.org/
-    * https://github.com/undefx/vecpy
-    * https://github.com/serge-sans-paille/pythran
-    * https://github.com/IntelLabs/hpat
-    * https://github.com/cupy/cupy
-* >90% test coverage
-* Fuzz testing using AFL
-    * http://lcamtuf.coredump.cx/afl/
-    * https://github.com/jwilk/python-afl
-
-### Beyond 2.0
-
-* Implement additional alternate alignment algorithms.
-* Implement the error detection algorithm in ADEPT: https://github.com/LANL-Bioinformatics/ADEPT
-* Explore new quality trimming algorithms
-  * UrQt: http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4450468/
-  * InfoTrim: github.com/JacobPorter/InfoTrim
-* Scythe is an interesting new trimmer. Depending on how the benchmarks look in the forthcoming paper, we will add it to the list of tools we compare against Atropos, and perhaps implement their Bayesian approach for adapter match.
-* Experiment with replacing the multicore implementation with an asyncio-based implementation (using ProcessPoolExecutor and uvloop).
-* Automatic adaptive tuning of queue sizes to maximize the balance between memory usage and latency.
-* FastProNGS has some nice visualizations that could be included, rather than relying on MultiQC: https://github.com/Megagenomics/FastProNGS
-
-While we consider the command-line interface to be stable, the internal code organization of Atropos is likely to change. At this time, we recommend to not directly interface with Atropos as a library (or to be prepared for your code to break). The internal code organization will be stabilized as of version 2.0, which is planned for sometime in 2017.
-
-If you would like to suggest additional enhancements, you can submit issues and/or pull requests at our GitHub page.
--- a/atropos/__init__.py
+++ b/atropos/__init__.py
-# coding: utf-8
-"""Top-level Atropos package.
-"""
-import os
-import sys
+import pkg_resources

-from atropos._version import get_versions
-__version__ = get_versions()['version']
-del get_versions

-class AtroposError(Exception):
-    """Base class for Atropos-specific errors.
-    """
-    pass
+UNKNOWN_VERSION = "Unknown"

-def check_importability():  # pragma: no cover
-    """Check that cython modules haev been compile.
-    """
-    try:
-        import atropos.align._align # pylint: disable=unused-variable
-    except ImportError as err:
-        if 'undefined symbol' in str(err):
-            print("""
-ERROR: A required extension module could not be imported because it is
-incompatible with your system. A quick fix is to recompile the extension
-modules with the following command:
-
-    {0} setup.py build_ext -i

-See the documentation for alternative ways of installing the program.
-
-The original error message follows.
-""".format(sys.executable))
-        raise
+try:
+    __version__ = pkg_resources.get_distribution(__name__).version
+except pkg_resources.DistributionNotFound:
+    __version__ = UNKNOWN_VERSION
--- a/atropos/__main__.py
+++ b/atropos/__main__.py
+import sys
+from typing import Optional, Sequence
+
+from atropos.console import run_atropos
+
+
+def main(args: Optional[Sequence[str]] = None):
+    """
+    Main method, designed to be called by the system.
+
+    Always calls `sys.exit()`. If you want to run Atropos programatically, call
+    `run()` instead.
+
+    Args:
+        args: Command-line arguments. If None, `sys.argv[1:]` is used.
+    """
+    sys.exit(run_atropos(args))
+
+
+if __name__ == "__main__":
+    main()
--- a/atropos/_version.py
+++ b/atropos/_version.py
-
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.16 (https://github.com/warner/python-versioneer)
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-
-def get_keywords():
-    """Get the keywords needed to look up the version information."""
-    # these strings will be replaced by git during git-archive.
-    # setup.py/versioneer.py will grep for the variable names, so they must
-    # each be defined on a line of their own. _version.py will just call
-    # get_keywords().
-    git_refnames = " (HEAD -> 1.1, tag: 1.1.24)"
-    git_full = "9281be92f0e52a14085841344a509f7808efcfe1"
-    keywords = {"refnames": git_refnames, "full": git_full}
-    return keywords
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_config():
-    """Create, populate and return the VersioneerConfig() object."""
-    # these strings are filled in when 'setup.py versioneer' creates
-    # _version.py
-    cfg = VersioneerConfig()
-    cfg.VCS = "git"
-    cfg.style = "pep440"
-    cfg.tag_prefix = ""
-    cfg.parentdir_prefix = "atropos-"
-    cfg.versionfile_source = "atropos/_version.py"
-    cfg.verbose = False
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY = {}
-HANDLERS = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Decorator to mark a method as the handler for a particular VCS."""
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-    return decorate
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            dispcmd = str([c] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % dispcmd)
-                print(e)
-            return None
-    else:
-        if verbose:
-            print("unable to find command, tried %s" % (commands,))
-        return None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % dispcmd)
-        return None
-    return stdout
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes
-    both the project name and a version string.
-    """
-    dirname = os.path.basename(root)
-    if not dirname.startswith(parentdir_prefix):
-        if verbose:
-            print("guessing rootdir is '%s', but '%s' doesn't start with "
-                  "prefix '%s'" % (root, dirname, parentdir_prefix))
-        raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-    return {"version": dirname[len(parentdir_prefix):],
-            "full-revisionid": None,
-            "dirty": False, "error": None}
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if not keywords:
-        raise NotThisMethod("no keywords at all, weird")
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs-tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            if verbose:
-                print("picking %s" % r)
-            return {"version": r,
-                    "full-revisionid": keywords["full"].strip(),
-                    "dirty": False, "error": None
-                    }
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full-revisionid": keywords["full"].strip(),
-            "dirty": False, "error": "no suitable tags"}
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    if not os.path.exists(os.path.join(root, ".git")):
-        if verbose:
-            print("no .git in %s" % root)
-        raise NotThisMethod("no .git directory")
-
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out = run_command(GITS, ["describe", "--tags", "--dirty",
-                                      "--always", "--long",
-                                      "--match", "%s*" % tag_prefix],
-                               cwd=root)
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-        if not mo:
-            # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = ("unable to parse git-describe output: '%s'"
-                               % describe_out)
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
-            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
-                               % (full_tag, tag_prefix))
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix):]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out = run_command(GITS, ["rev-list", "HEAD", "--count"],
-                                cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    return pieces
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
-                                          pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_pre(pieces):
-    """TAG[.post.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += ".post.dev%d" % pieces["distance"]
-    else:
-        # exception #1
-        rendered = "0.post.dev%d" % pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Eexceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {"version": "unknown",
-                "full-revisionid": pieces.get("long"),
-                "dirty": None,
-                "error": pieces["error"]}
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%s'" % style)
-
-    return {"version": rendered, "full-revisionid": pieces["long"],
-            "dirty": pieces["dirty"], "error": None}
-
-
-def get_versions():
-    """Get version information or return default if unable to do so."""
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    cfg = get_config()
-    verbose = cfg.verbose
-
-    try:
-        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
-                                          verbose)
-    except NotThisMethod:
-        pass
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in cfg.versionfile_source.split('/'):
-            root = os.path.dirname(root)
-    except NameError:
-        return {"version": "0+unknown", "full-revisionid": None,
-                "dirty": None,
-                "error": "unable to find root of source tree"}
-
-    try:
-        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
-        return render(pieces, cfg.style)
-    except NotThisMethod:
-        pass
-
-    try:
-        if cfg.parentdir_prefix:
-            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-    except NotThisMethod:
-        pass
-
-    return {"version": "0+unknown", "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to compute version"}
--- a/atropos/adapters/__init__.py
+++ b/atropos/adapters/__init__.py
--- a/atropos/align/__init__.py
+++ b/atropos/align/__init__.py
-# coding: utf-8
-"""
-Alignment module.
-"""
-from collections import namedtuple
-from atropos.align._align import Aligner, MultiAligner, compare_prefixes, locate
-from atropos.util import RandomMatchProbability, reverse_complement
-
-# flags for global alignment
-
-# The interpretation of the first flag is:
-# An initial portion of seq1 may be skipped at no cost.
-# This is equivalent to saying that in the alignment,
-# gaps in the beginning of seq2 are free.
-#
-# The other flags have an equivalent meaning.
-START_WITHIN_SEQ1 = 1
-START_WITHIN_SEQ2 = 2
-STOP_WITHIN_SEQ1 = 4
-STOP_WITHIN_SEQ2 = 8
-
-# Use this to get regular semiglobal alignment
-# (all gaps in the beginning or end are free)
-SEMIGLOBAL = (
-    START_WITHIN_SEQ1 | START_WITHIN_SEQ2 |
-    STOP_WITHIN_SEQ1 | STOP_WITHIN_SEQ2)
-
-def compare_suffixes(
-        suffix_ref, suffix_query, wildcard_ref=False, wildcard_query=False):
-    """Find out whether one string is the suffix of the other one, allowing
-    mismatches. Used to find an anchored 3' adapter when no indels are allowed.
-
-    Args:
-        suffix_ref, suffix_query: The suffices to compare.
-        wildcard_ref, wildcard_query: Whether wildcards are valid in either of
-            the suffices.
-    """
-    suffix_ref = suffix_ref[::-1]
-    suffix_query = suffix_query[::-1]
-    _, length, _, _, matches, errors = compare_prefixes(
-        suffix_ref, suffix_query, wildcard_ref, wildcard_query)
-    return (
-        len(suffix_ref) - length, len(suffix_ref), len(suffix_query) - length,
-        len(suffix_query), matches, errors)
-
-# Common match-result object returned by aligners
-
-# TODO creating instances of this class is relatively slow and responsible for
-# quite some runtime.
-
-class Match(object):
-    """An alignment match.
-
-    Args:
-        astart: Starting position of the match within the adapter.
-        astop: Ending position of the match within the adapter.
-        rstart: Starting position of the match within the read.
-        rstop: Ending position of the match within the read.
-        matches: Number of matching bases.
-        errors: Number of mismatching bases.
-        front: Whether the match is to the front of the read.
-        adapter: The :class:`Adapter`.
-        read: The :class:`Sequence`.
-        length: The length of the match.
-    """
-    __slots__ = [
-        'astart', 'astop', 'rstart', 'rstop', 'matches', 'errors', 'front',
-        'adapter', 'read', 'length']
-
-    def __init__(
-            self, astart, astop, rstart, rstop, matches, errors,
-            front=None, adapter=None, read=None):
-        self.astart = astart
-        self.astop = astop
-        self.rstart = rstart
-        self.rstop = rstop
-        self.matches = matches
-        self.errors = errors
-        self.front = self._guess_is_front() if front is None else front
-        self.adapter = adapter
-        self.read = read
-        # Number of aligned characters in the adapter. If there are indels,
-        # this may be different from the number of characters in the read.
-        self.length = self.astop - self.astart
-        if self.length <= 0:
-            raise ValueError('Match length must be >= 0')
-        if self.length - self.errors <= 0:
-            raise ValueError('A Match requires at least one matching position.')
-        # TODO: this assertion may not always hold now that we use both error
-        # rates and probabilities
-        #if self.adapter:
-        #    assert self.errors / self.length <= self.adapter.max_error_rate
-
-    def __str__(self):
-        return (
-            'Match(astart={0}, astop={1}, rstart={2}, rstop={3}, matches={4}, '
-            'errors={5})').format(
-                self.astart, self.astop, self.rstart, self.rstop, self.matches,
-                self.errors)
-
-    def copy(self):
-        """Create a copy of this Match.
-        """
-        return Match(
-            self.astart, self.astop, self.rstart, self.rstop, self.matches,
-            self.errors, self.front, self.adapter, self.read)
-
-    def _guess_is_front(self):
-        """Return whether this is guessed to be a front adapter.
-
-        The match is assumed to be a front adapter when the first base of
-        the read is involved in the alignment to the adapter.
-        """
-        return self.rstart == 0
-
-    def wildcards(self, wildcard_char='N'):
-        """Return a string that contains, for each wildcard character,
-        the character that it matches. For example, if the adapter
-        ATNGNA matches ATCGTA, then the string 'CT' is returned.
-
-        If there are indels, this is not reliable as the full alignment
-        is not available.
-        """
-        wildcards = [
-            self.read.sequence[self.rstart + i]
-            for i in range(self.length)
-            if (self.adapter.sequence[self.astart + i] == wildcard_char and
-                self.rstart + i < len(self.read.sequence))
-        ]
-        return ''.join(wildcards)
-
-    def rest(self):
-        """Returns the part of the read before this match if this is a
-        'front' (5') adapter, or the part after the match if this is not
-        a 'front' adapter (3'). This can be an empty string.
-        """
-        if self.front:
-            return self.read.sequence[:self.rstart]
-        else:
-            return self.read.sequence[self.rstop:]
-
-    # TODO: write test
-    def get_info_record(self):
-        """Returns a :class:`MatchInfo`, which contains information about the
-        match to write into the info file.
-        """
-        seq = self.read.sequence
-        qualities = self.read.qualities
-        if qualities is None:
-            qualities = ''
-        rsize = rsize_total = self.rstop - self.rstart
-        if self.front and self.rstart > 0:
-            rsize_total = self.rstop
-        elif not self.front and self.rstop < len(seq):
-            rsize_total = len(seq) - self.rstart
-        return MatchInfo(
-            self.read.name,
-            self.errors,
-            self.rstart,
-            self.rstop,
-            seq[0:self.rstart],
-            seq[self.rstart:self.rstop],
-            seq[self.rstop:],
-            self.adapter.name,
-            qualities[0:self.rstart],
-            qualities[self.rstart:self.rstop],
-            qualities[self.rstop:],
-            self.front,
-            self.astop - self.astart,
-            rsize, rsize_total)
-
-MatchInfo = namedtuple("MatchInfo", (
-    "read_name", "errors", "rstart", "rstop", "seq_before", "seq_adapter",
-    "seq_after", "adapter_name", "qual_before", "qual_adapter", "qual_after",
-    "is_front", "asize", "rsize_adapter", "rsize_total"))
-
-
-class InsertAligner(object):
-    """Implementation of an insert matching algorithm.
-
-    If the inserts align, the overhangs are searched for the adapter sequences.
-    Otherwise, each read is search for its adapter separately.
-
-    This only works with paired-end reads with 3' adapters.
-
-    Args:
-        adapter1, adapter2: read1, read2 adapters.
-        match_probability: Callable that calculates random match probability
-            given arguments (num_matches, match_len).
-        insert_max_rmp: Max random match probability for the insert match.
-        adapter_max_rmp: Max random match probability for the adapter match.
-        min_insert_overlap: Minimum number of bases the inserts must overlap
-            to be considered matching.
-        max_insert_mismatch_frac: Maximum fraction of mismatching bases between
-            the inserts to be considered matching.
-        min_adapter_overlap: Minimum number of bases the adapter must overlap
-            the read to be considered matching.
-        max_adapter_mismatch_frac: Maximum fraction of mismatching bases between
-            the adapter and the read to be considered matching.
-        adapter_check_cutoff: Threshold number of matching bases required before
-            adapter matches are checked against random match probability.
-        base_probs: Dict of (match_prob, mismatch_prob), which are the
-            probabilities passed to
-            :method:`atropos.util.RandomMatchProbability.__call__()`.
-    """
-    def __init__(
-            self, adapter1, adapter2,
-            match_probability=RandomMatchProbability(),
-            insert_max_rmp=1E-6, adapter_max_rmp=0.001,
-            min_insert_overlap=1, max_insert_mismatch_frac=0.2,
-            min_adapter_overlap=1, max_adapter_mismatch_frac=0.2,
-            adapter_check_cutoff=9, base_probs=None,
-            adapter_wildcards=True, read_wildcards=False):
-        self.adapter1 = adapter1
-        self.adapter1_len = len(adapter1)
-        self.adapter2 = adapter2
-        self.adapter2_len = len(adapter2)
-        self.match_probability = match_probability
-        self.insert_max_rmp = insert_max_rmp
-        self.adapter_max_rmp = adapter_max_rmp
-        self.min_insert_overlap = min_insert_overlap
-        self.max_insert_mismatch_frac = float(max_insert_mismatch_frac)
-        self.min_adapter_overlap = min_adapter_overlap
-        self.max_adapter_mismatch_frac = float(max_adapter_mismatch_frac)
-        self.adapter_check_cutoff = adapter_check_cutoff
-        self.base_probs = base_probs or dict(
-            match_prob=0.25, mismatch_prob=0.75)
-        self.adapter_wildcards = adapter_wildcards
-        self.read_wildcards = read_wildcards
-        self.aligner = MultiAligner(
-            max_insert_mismatch_frac,
-            START_WITHIN_SEQ1 | STOP_WITHIN_SEQ2,
-            min_insert_overlap)
-
-    def match_insert(self, seq1, seq2):
-        """Use cutadapt aligner for insert and adapter matching.
-
-        Args:
-            seq1, seq2: Sequences to match.
-
-        Returns:
-            A :class:`Match` object, or None if there is no match.
-        """
-        seq_len1 = len(seq1)
-        seq_len2 = len(seq2)
-        seq_len = min(seq_len1, seq_len2)
-        if seq_len1 > seq_len2:
-            seq1 = seq1[:seq_len2]
-        elif seq_len2 > seq_len1:
-            seq2 = seq2[:seq_len1]
-
-        seq2_rc = reverse_complement(seq2)
-
-        def _match(_insert_match, _offset, _insert_match_size, _):
-            if _offset < self.min_adapter_overlap:
-                # The reads are mostly overlapping, to the point where
-                # there's not enough overhang to do a confident adapter
-                # match. We return just the insert match to signal that
-                # error correction can be done even though no adapter
-                # trimming is required.
-                return (_insert_match, None, None)
-
-            # TODO: this is very sensitive to the exact correct choice of
-            # adapter. For example, if you specifiy GATCGGAA... and the correct
-            # adapter is AGATCGGAA..., the prefixes will not match exactly and
-            # the alignment will fail. We need to use a comparison that is a bit
-            # more forgiving.
-
-            def _adapter_match(insert_seq, adapter_seq, adapter_len):
-                amatch = compare_prefixes(
-                    insert_seq[_insert_match_size:], adapter_seq,
-                    wildcard_ref=self.adapter_wildcards,
-                    wildcard_query=self.read_wildcards)
-                alen = min(_offset, adapter_len)
-                return amatch, alen, round(alen * self.max_adapter_mismatch_frac)
-
-            a1_match, a1_length, a1_max_mismatches = _adapter_match(
-                seq1, self.adapter1, self.adapter1_len)
-            a2_match, a2_length, a2_max_mismatches = _adapter_match(
-                seq2, self.adapter2, self.adapter2_len)
-
-            if (
-                    a1_match[5] > a1_max_mismatches and
-                    a2_match[5] > a2_max_mismatches):
-                return None
-
-            if min(a1_length, a2_length) > self.adapter_check_cutoff:
-                a1_prob = self.match_probability(a1_match[4], a1_length)
-                a2_prob = self.match_probability(a2_match[4], a2_length)
-                if (a1_prob * a2_prob) > self.adapter_max_rmp:
-                    return None
-
-            mismatches = min(a1_match[5], a2_match[5])
-
-            def _create_match(alen, slen):
-                alen = min(alen, slen - _insert_match_size)
-                _mismatches = min(alen, mismatches)
-                _matches = alen - _mismatches
-                return Match(0, alen, _insert_match_size, slen, _matches, _mismatches)
-
-            return (
-                _insert_match,
-                _create_match(a1_length, seq_len1),
-                _create_match(a2_length, seq_len2)
-            )
-
-        # # This is the old way of doing things, where we use the built-in
-        # # Aligner to do a single match.
-        # aligner = Aligner(
-        #     seq2_rc,
-        #     self.max_insert_mismatch_frac,
-        #     START_WITHIN_SEQ1 | STOP_WITHIN_SEQ2,
-        #     False, False)
-        # aligner.min_overlap = self.min_insert_overlap
-        # aligner.indel_cost = 100000
-        #
-        # insert_match = aligner.locate(seq1)
-        #
-        # if not insert_match:
-        #     return None
-        #
-        # offset = min(insert_match[0], seq_len - insert_match[3])
-        # insert_match_size = seq_len - offset
-        # prob = self.match_probability(insert_match[4], insert_match_size)
-        #
-        # if prob > self.insert_max_rmp:
-        #     return None
-        #
-        # return _match(insert_match, offset, insert_match_size, prob)
-
-        # Use an aligner that returns all matches that satisfy the
-        # overlap and error rate thresholds. We sort by matches and
-        # then mismatches, and then check each in turn until we find
-        # one with an adapter match (if any).
-
-        insert_matches = self.aligner.locate(seq2_rc, seq1)
-
-        if insert_matches:
-            # Filter by random-match probability
-            filtered_matches = []
-            for insert_match in insert_matches:
-                offset = min(insert_match[0], seq_len - insert_match[3])
-                insert_match_size = seq_len - offset
-                prob = self.match_probability(insert_match[4], insert_match_size, **self.base_probs)
-                if prob <= self.insert_max_rmp:
-                    filtered_matches.append((insert_match, offset, insert_match_size, prob))
-
-            if filtered_matches:
-                if len(filtered_matches) == 1:
-                    return _match(*filtered_matches[0])
-                else:
-                    # Test matches in order of random-match probability.
-                    # TODO: compare against sorting by length (which is how
-                    # SeqPurge essentially does it).
-                    # filtered_matches.sort(key=lambda x: x[2], reverse=True)
-                    filtered_matches.sort(key=lambda x: x[3])
-                    for match_args in filtered_matches:
-                        match = _match(*match_args)
-                        if match:
-                            return match
-
-            return None
--- a/atropos/aligner/__init__.py
+++ b/atropos/aligner/__init__.py
+from enum import IntFlag
+from typing import Tuple
+
+from ._aligner import Aligner, MultiAligner, compare_prefixes, locate
+
+
+MatchTuple = Tuple[int, int, int, int, int, int]
+
+
+class GapRule(IntFlag):
+    START_WITHIN_SEQ1 = 1
+    """Gaps at begining of seq2 have no penalty."""
+    START_WITHIN_SEQ2 = 2
+    """Gaps at begining of seq1 have no penalty."""
+    STOP_WITHIN_SEQ1 = 4
+    """Gaps at end of seq2 have no penalty."""
+    STOP_WITHIN_SEQ2 = 8
+    """Gaps at end of seq1 have no penalty"""
+    SEMIGLOBAL = (
+        START_WITHIN_SEQ1
+        | START_WITHIN_SEQ2
+        | STOP_WITHIN_SEQ1
+        | STOP_WITHIN_SEQ2
+    )
+    """Typical semiglobal alignment (all gaps in the beginning or end are free)"""
+
+
+def compare_suffixes(
+    suffix_ref: str,
+    suffix_query: str,
+    wildcard_ref: bool = False,
+    wildcard_query: bool = False,
+) -> MatchTuple:
+    """
+    Find out whether one string is the suffix of the other one, allowing mismatches.
+    Used to find an anchored 3' adapter when no indels are allowed.
+
+    Args:
+        suffix_ref, suffix_query: The suffices to compare.
+        wildcard_ref, wildcard_query: Whether wildcards are valid in either of
+            the suffices.
+    """
+    suffix_ref = suffix_ref[::-1]
+    suffix_query = suffix_query[::-1]
+    _, length, _, _, matches, errors = compare_prefixes(
+        suffix_ref, suffix_query, wildcard_ref, wildcard_query
+    )
+    return (
+        len(suffix_ref) - length,
+        len(suffix_ref),
+        len(suffix_query) - length,
+        len(suffix_query),
+        matches,
+        errors,
+    )
--- a/atropos/align/_align.pyx
+++ b/atropos/align/_align.pyx
 # cython: profile=False, emit_code_comments=False

 # TODO: Flouri et al. report errors in Gotoh's original paper that persist
-# in most implementations of NW alignment (http://biorxiv.org/content/biorxiv/early/2015/11/12/031500.full.pdf).
-# They provide a correct implementation (qalign: http://www.exelixis-lab.org/web/software/alignment/).
+#  in most implementations of NW alignment
+#  (http://biorxiv.org/content/biorxiv/early/2015/11/12/031500.full.pdf).
+#  They provide a correct implementation
+#  (qalign: http://www.exelixis-lab.org/web/software/alignment/).

-from cpython.mem cimport PyMem_Malloc, PyMem_Free, PyMem_Realloc
-from cpython.array cimport array, clone
+from cpython.mem cimport PyMem_Free, PyMem_Realloc
+from cpython.array cimport array
 cdef array ld_array = array('d', [])
-from libc.math cimport ceil

 DEF START_WITHIN_SEQ1 = 1
 DEF START_WITHIN_SEQ2 = 2
@@ -18,8 +19,11 @@ DEF SEMIGLOBAL = 15
 # structure for a DP matrix entry
 ctypedef struct _Entry:
    int cost
-    int matches  # no. of matches in this alignment
-    int origin   # where the alignment originated: negative for positions within seq1, positive for pos. within seq2
+    # no. of matches in this alignment
+    int matches
+    # where the alignment originated: negative for positions within seq1, positive for
+    # pos. within seq2
+    int origin

 ctypedef struct _Match:
    int origin
@@ -112,11 +116,11 @@ class DPMatrix:
        """
        Return a representation of the matrix as a string.
        """
-        rows = ['     ' + ' '.join(c.rjust(2) for c in self.query)]
-        for c, row in zip(' ' + self.reference, self._rows):
-            r = c + ' ' + ' '.join('  ' if v is None else '{0:2d}'.format(v) for v in row)
+        rows = ["     " + " ".join(c.rjust(2) for c in self.query)]
+        for c, row in zip(" " + self.reference, self._rows):
+            r = c + " " + " ".join("  " if v is None else f"{v:2d}" for v in row)
            rows.append(r)
-        return '\n'.join(rows)
+        return "\n".join(rows)

 cdef class Aligner:
    """
@@ -125,8 +129,8 @@ cdef class Aligner:
    Locate one string within another by computing an optimal semiglobal
    alignment between string1 and string2.

-    The alignment uses unit costs, which means that mismatches, insertions and deletions are
-    counted as one error.
+    The alignment uses unit costs, which means that mismatches, insertions and deletions
+    are counted as one error.

    flags is a bitwise 'or' of the allowed flags.
    To allow skipping of a prefix of string1 at no cost, set the
@@ -143,7 +147,8 @@ cdef class Aligner:
    The skipped parts are described with two intervals (start1, stop1),
    (start2, stop2).

-    For example, an optimal semiglobal alignment of SISSI and MISSISSIPPI looks like this:
+    For example, an optimal semiglobal alignment of SISSI and MISSISSIPPI looks like
+    this:

    ---SISSI---
    MISSISSIPPI
@@ -167,9 +172,9 @@ cdef class Aligner:
      leftmost position within the read.

    The alignment itself is not returned, only the tuple
-    (start1, stop1, start2, stop2, matches, errors), where the first four fields have the
-    meaning as described, matches is the number of matches and errors is the number of
-    errors in the alignment.
+    (start1, stop1, start2, stop2, matches, errors), where the first four fields have
+    the meaning as described, matches is the number of matches and errors is the
+    number of errors in the alignment.

    It is always the case that at least one of start1 and start2 is zero.

@@ -194,8 +199,16 @@ cdef class Aligner:
    cdef bytes _reference  # TODO rename to translated_reference or so
    cdef str str_reference

-    def __cinit__(self, str reference, double max_error_rate, int flags=SEMIGLOBAL, bint wildcard_ref=False,
-                  bint wildcard_query=False, int min_overlap=1, int indel_cost=1):
+    def __cinit__(
+        self,
+        str reference,
+        double max_error_rate,
+        int flags=SEMIGLOBAL,
+        bint wildcard_ref=False,
+        bint wildcard_query=False,
+        int min_overlap=1,
+        int indel_cost=1
+    ):
        self.max_error_rate = max_error_rate
        self.flags = flags
        self.wildcard_ref = wildcard_ref
@@ -232,7 +245,9 @@ cdef class Aligner:
            return self._reference

        def __set__(self, str reference):
-            mem = <_Entry*> PyMem_Realloc(self.column, (len(reference) + 1) * sizeof(_Entry))
+            mem = <_Entry*> PyMem_Realloc(
+                self.column, (len(reference) + 1) * sizeof(_Entry)
+            )
            if not mem:
                raise MemoryError()
            self.column = mem
@@ -461,7 +476,14 @@ cdef class Aligner:
                length = i + min(column[i].origin, 0)
                cost = column[i].cost
                matches = column[i].matches
-                if length >= self._min_overlap and cost <= length * max_error_rate and (matches > best.matches or (matches == best.matches and cost < best.cost)):
+                if (
+                    length >= self._min_overlap and
+                    cost <= length * max_error_rate and (
+                        matches > best.matches or (
+                            matches == best.matches and cost < best.cost
+                        )
+                    )
+                ):
                    # update best
                    best.matches = matches
                    best.cost = cost
@@ -484,17 +506,27 @@ cdef class Aligner:
            start2 = 0

        assert best.ref_stop - start1 > 0  # Do not return empty alignments.
-        return (start1, best.ref_stop, start2, best.query_stop, best.matches, best.cost)
+        return start1, best.ref_stop, start2, best.query_stop, best.matches, best.cost

    def __dealloc__(self):
        PyMem_Free(self.column)

-def locate(str reference, str query, double max_error_rate, int flags=SEMIGLOBAL, bint wildcard_ref=False, bint wildcard_query=False, int min_overlap=1):
+def locate(
+    str reference,
+    str query,
+    double max_error_rate,
+    int flags=SEMIGLOBAL,
+    bint wildcard_ref=False,
+    bint wildcard_query=False,
+    int min_overlap=1
+):
    aligner = Aligner(reference, max_error_rate, flags, wildcard_ref, wildcard_query)
    aligner.min_overlap = min_overlap
    return aligner.locate(query)

-def compare_prefixes(str ref, str query, bint wildcard_ref=False, bint wildcard_query=False):
+def compare_prefixes(
+    str ref, str query, bint wildcard_ref=False, bint wildcard_query=False
+):
    """
    Find out whether one string is the prefix of the other one, allowing
    IUPAC wildcards in ref and/or query if the appropriate flag is set.
@@ -537,7 +569,7 @@ def compare_prefixes(str ref, str query, bint wildcard_ref=False, bint wildcard_
                matches += 1

    # length - matches = no. of errors
-    return (0, length, 0, length, matches, length - matches)
+    return 0, length, 0, length, matches, length - matches

 DEF OVERHANG_MULTIPLIER = 100000

@@ -562,6 +594,11 @@ cdef class MultiAligner:
        self._num_cols = 0
        self._num_matches = 0

+    def __repr__(self):
+        return f"MultiAligner<max_error_rate={self.max_error_rate}, " \
+               f"flags={self.flags}, min_overlap={self._min_overlap}, " \
+               f"_num_cols={self._num_cols}, _num_matches={self._num_matches}>"
+
    def _resize_matrix(self, size):
        if size > self._num_cols:
            mem = <_Entry*> PyMem_Realloc(self.column, (size + 1) * sizeof(_Entry))
@@ -741,7 +778,10 @@ cdef class MultiAligner:
                            continue

                        length = i + min(column[i].origin, 0)
-                        if length >= self._min_overlap and cost <= length * max_error_rate:
+                        if (
+                            length >= self._min_overlap and
+                            cost <= length * max_error_rate
+                        ):
                            # update best
                            match_array[num_matches].ref_stop = i
                            match_array[num_matches].query_stop = n
@@ -768,7 +808,14 @@ cdef class MultiAligner:
            start1 = -_match.origin
            start2 = 0
        assert _match.ref_stop - start1 > 0  # Do not return empty alignments.
-        return (start1, _match.ref_stop, start2, _match.query_stop, _match.matches, _match.cost)
+        return (
+            start1,
+            _match.ref_stop,
+            start2,
+            _match.query_stop,
+            _match.matches,
+            _match.cost
+        )

    def __dealloc__(self):
        PyMem_Free(self.column)

--- a/atropos/commands/__init__.py
+++ b/atropos/commands/__init__.py
--- a/atropos/commands/base.py
+++ b/atropos/commands/base.py
--- a/atropos/commands/cli.py
+++ b/atropos/commands/cli.py
--- a/atropos/commands/console.py
+++ b/atropos/commands/console.py