Skip to content
Commits on Source (7)
......@@ -15,17 +15,21 @@ addons:
packages:
clang
# NOTE: Currently I commented out testing of python binding on OSX because they do not support
# Python building and I have to provide the tools myself. I could probably install correct pip
# (and maybe python) version myself (homebrew?), however I haven't gotten to that yet.
install:
- if [ $TRAVIS_OS_NAME == "linux" ]; then sudo apt-get install valgrind; fi
- sudo -H pip install cython # Needed to build Python module.
- if [ $TRAVIS_OS_NAME == "linux" ]; then sudo -H pip install cython; fi # Needed to build Python module.
before_script:
# Build C/C++ library and apps.
- mkdir -p build && cd build && cmake .. && make && cd ..
# Build Python source distribution and install Edlib from it.
- cd bindings/python && make sdist && cd ../..
- sudo -H pip install bindings/python/dist/edlib*.tar.gz
- if [ $TRAVIS_OS_NAME == "linux" ]; then cd bindings/python && make sdist && cd ../..; fi
- if [ $TRAVIS_OS_NAME == "linux" ]; then sudo -H pip install bindings/python/dist/edlib*.tar.gz; fi
script:
# Test C/C++ library.
......@@ -37,4 +41,4 @@ script:
- if [ $TRAVIS_OS_NAME == "linux" ]; then valgrind --quiet --error-exitcode=2 --tool=memcheck --leak-check=full build/bin/runTests 2; fi
# Test Python module.
- sudo -H python bindings/python/test.py
- if [ $TRAVIS_OS_NAME == "linux" ]; then sudo -H python bindings/python/test.py; fi
FROM alpine
MAINTAINER Martin Sosic <sosic.martin@gmail.com>
WORKDIR ~
# Install needed packages to compile Edlib, then pull it from git repo, compile it,
# move edlib-aligner to bin/ so it can be executed, delete edlib source files and delete all packages
# except for libstdc++ (which is needed for edlib-aligner to run) and bash for convenience, in order to keep docker image small.
RUN apk --no-cache add cmake clang clang-dev make gcc g++ libc-dev linux-headers git libstdc++ bash && \
git clone -b v1.2.3 https://github.com/Martinsos/edlib.git edlib-git && \
cd edlib-git && cd build && cmake -D CMAKE_BUILD_TYPE=Release .. && make edlib-aligner && \
cp bin/edlib-aligner /bin && \
cd ../.. && rm -r edlib-git && \
apk del cmake clang clang-dev make gcc g++ libc-dev linux-headers git
CMD ["edlib-aligner"]
Edlib
=====
&middot;
[![Latest Github release](https://img.shields.io/github/release/Martinsos/edlib.svg)](https://github.com/Martinsos/edlib/releases/latest)
[![Build status of the master branch on Linux/OSX](https://img.shields.io/travis/Martinsos/edlib/master.svg?label=Linux%20%2F%20OSX%20build)](https://travis-ci.org/Martinsos/edlib)
[![Build status of the master branch on Windows](https://img.shields.io/appveyor/ci/Martinsos/edlib.svg?label=Windows%20build)](https://ci.appveyor.com/project/Martinsos/edlib/branch/master)
[![Chat on Gitter](https://img.shields.io/gitter/room/Martinsos/edlib.svg?colorB=753a88)](https://gitter.im/Martinsos/edlib)
[![Published in Bioinformatics](https://img.shields.io/badge/Published%20in-Bioinformatics-167DA4.svg)](https://doi.org/10.1093/bioinformatics/btw753)
=====
A lightweight and super fast C/C++ library for sequence alignment using [edit distance](https://en.wikipedia.org/wiki/Edit_distance).
......
......@@ -39,13 +39,14 @@ int main(int argc, char * const argv[]) {
bool findStartLocations = false;
int option;
int kArg = -1;
int numRepeats = 1;
// If "STD" or "EXT", cigar string will be printed. if "NICE" nice representation
// of alignment will be printed.
char alignmentFormat[16] = "NICE";
bool invalidOption = false;
while ((option = getopt(argc, argv, "m:n:k:f:spl")) >= 0) {
while ((option = getopt(argc, argv, "m:n:k:f:r:spl")) >= 0) {
switch (option) {
case 'm': strcpy(mode, optarg); break;
case 'n': numBestSeqs = atoi(optarg); break;
......@@ -54,6 +55,7 @@ int main(int argc, char * const argv[]) {
case 's': silent = true; break;
case 'p': findAlignment = true; break;
case 'l': findStartLocations = true; break;
case 'r': numRepeats = atoi(optarg); break;
default: invalidOption = true;
}
}
......@@ -77,6 +79,9 @@ int main(int argc, char * const argv[]) {
fprintf(stderr, "\t-f NICE|CIG_STD|CIG_EXT Format that will be used to print alignment path,"
" can be used only with -p. NICE will give visually attractive format, CIG_STD will "
" give standard cigar format and CIG_EXT will give extended cigar format. [default: NICE]\n");
fprintf(stderr, "\t-r N Core part of calculation will be repeated N times."
" This is useful only for performance measurement, when single execution is too short to measure."
" [default: 1]\n");
return 1;
}
//-------------------------------------------------------------------------//
......@@ -157,9 +162,15 @@ int main(int argc, char * const argv[]) {
for (int i = 0; i < numQueries; i++) {
char* query = (*querySequences)[i].data();
int queryLength = (*querySequences)[i].size();
// Calculate score
EdlibAlignResult result = edlibAlign(query, queryLength, target, targetLength,
EdlibAlignResult result;
for (int rep = 0; rep < numRepeats; rep++) { // Redundant repetition, for performance measurements.
result = edlibAlign(query, queryLength, target, targetLength,
edlibNewAlignConfig(k, modeCode, alignTask, NULL, 0));
if (rep < numRepeats - 1) edlibFreeAlignResult(result);
}
scores[i] = result.editDistance;
endLocations[i] = result.endLocations;
startLocations[i] = result.startLocations;
......
cdef extern from "edlib.h":
cdef extern from "edlib.h" nogil:
ctypedef enum EdlibAlignMode: EDLIB_MODE_NW, EDLIB_MODE_SHW, EDLIB_MODE_HW
ctypedef enum EdlibAlignTask: EDLIB_TASK_DISTANCE, EDLIB_TASK_LOC, EDLIB_TASK_PATH
......
cimport cython
from libc.stdlib cimport malloc, free
from cpython.mem cimport PyMem_Malloc, PyMem_Free
cimport cedlib
def align(query, target, mode="NW", task="distance", k=-1, additionalEqualities=None):
""" Align query with target using edit distance.
@param {string} query
@param {string} target
@param {string or bytes array} query
@param {string or bytes array} target
@param {string} mode Optional. Alignment method do be used. Possible values are:
- 'NW' for global (default)
- 'HW' for infix
......@@ -35,9 +35,9 @@ def align(query, target, mode="NW", task="distance", k=-1, additionalEqualities=
e.g. cigar of "5=1X1=1I" means "5 matches, 1 mismatch, 1 match, 1 insertion (to target)".
"""
# Transform python strings into c strings.
cdef bytes query_bytes = query.encode();
cdef bytes query_bytes = query.encode('utf-8') if type(query) != bytes else query;
cdef char* cquery = query_bytes;
cdef bytes target_bytes = target.encode();
cdef bytes target_bytes = target.encode('utf-8') if type(target) != bytes else target;
cdef char* ctarget = target_bytes;
# Build an edlib config object based on given parameters.
......@@ -59,21 +59,19 @@ def align(query, target, mode="NW", task="distance", k=-1, additionalEqualities=
cconfig.additionalEqualities = NULL
cconfig.additionalEqualitiesLength = 0
else:
cconfig.additionalEqualities = <cedlib.EdlibEqualityPair*> malloc(len(additionalEqualities)
cconfig.additionalEqualities = <cedlib.EdlibEqualityPair*> PyMem_Malloc(len(additionalEqualities)
* cython.sizeof(cedlib.EdlibEqualityPair))
for i in range(len(additionalEqualities)):
# TODO(martin): Is there a better way to do this conversion? There must be.
tmp_bytes = additionalEqualities[i][0].encode();
tmp_cstring = tmp_bytes;
cconfig.additionalEqualities[i].first = tmp_cstring[0]
tmp_bytes = additionalEqualities[i][1].encode();
tmp_cstring = tmp_bytes;
cconfig.additionalEqualities[i].second = tmp_cstring[0]
cconfig.additionalEqualities[i].first = bytearray(additionalEqualities[i][0].encode('utf-8'))[0]
cconfig.additionalEqualities[i].second = bytearray(additionalEqualities[i][1].encode('utf-8'))[0]
cconfig.additionalEqualitiesLength = len(additionalEqualities)
# Run alignment.
cresult = cedlib.edlibAlign(cquery, len(query), ctarget, len(target), cconfig)
if cconfig.additionalEqualities != NULL: free(cconfig.additionalEqualities)
# Run alignment -- need to get len before disabling the GIL
query_len = len(query)
target_len = len(target)
with nogil:
cresult = cedlib.edlibAlign(cquery, query_len, ctarget, target_len, cconfig)
if cconfig.additionalEqualities != NULL: PyMem_Free(cconfig.additionalEqualities)
if cresult.status == 1:
raise Exception("There was an error.")
......
......@@ -27,7 +27,7 @@ setup(
name = "edlib",
description = "Lightweight, super fast library for sequence alignment using edit (Levenshtein) distance.",
long_description = long_description,
version = "1.2.2",
version = "1.2.3-1",
url = "https://github.com/Martinsos/edlib",
author = "Martin Sosic",
author_email = "sosic.martin@gmail.com",
......
......@@ -7,6 +7,10 @@ result = edlib.align("telephone", "elephant")
if not (result and result["editDistance"] == 3):
testFailed = True
result = edlib.align(b"telephone", b"elephant")
if not (result and result["editDistance"] == 3):
testFailed = True
result = edlib.align("ACTG", "CACTRT", mode="HW", task="path", additionalEqualities=[("R", "A"), ("R", "G")])
if not (result and result["editDistance"] == 0):
testFailed = True
......
libedlib (1.2.4-1) unstable; urgency=medium
* New upstream version
* debhelper 12
* Standards-Version: 4.3.0
-- Andreas Tille <tille@debian.org> Mon, 28 Jan 2019 19:11:07 +0100
libedlib (1.2.3-5) unstable; urgency=medium
* Declare characters signed explicitly
......
......@@ -3,7 +3,7 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.
Uploaders: Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~),
Build-Depends: debhelper (>= 12~),
cmake,
dh-python,
d-shlibs,
......@@ -11,7 +11,7 @@ Build-Depends: debhelper (>= 11~),
cython3,
python3-all-dev,
python3-setuptools
Standards-Version: 4.2.1
Standards-Version: 4.3.0
Vcs-Browser: https://salsa.debian.org/med-team/libedlib
Vcs-Git: https://salsa.debian.org/med-team/libedlib.git
Homepage: https://github.com/Martinsos/edlib
......
soversion.patch
do_not_build_hello_example.patch
cython3.patch
signed_char.patch
# signed_char.patch
......@@ -2,6 +2,7 @@
#include <ctime>
#include <cstdlib>
#include <cstring>
#include <climits>
#include "edlib.h"
#include "SimpleEditDistance.h"
......@@ -412,8 +413,10 @@ bool test10() {
bool test11() {
int queryLength = 8;
int targetLength = 8;
char query[8] = {-127, 127, -55, 0, 42, 0, 127, -55};
char target[8] = {-127, 127, 0, 42, 0, -55, 127, 42};
// NOTE(Martin): I am using CHAR_MIN and CHAR_MAX because 'char' type is not guaranteed to be
// signed or unsigned by compiler, we can't know if it is signed or unsigned.
char query[8] = {CHAR_MIN, CHAR_MIN + (CHAR_MAX - CHAR_MIN) / 2, CHAR_MAX};
char target[8] = {CHAR_MIN, CHAR_MIN + (CHAR_MAX - CHAR_MIN) / 2 + 1, CHAR_MAX};
bool r = executeTest(query, queryLength, target, targetLength, EDLIB_MODE_HW);
r = r && executeTest(query, queryLength, target, targetLength, EDLIB_MODE_NW);
......
>e_coli_DH1_illumina_1x100.fasta.000000000
CGTAATAAGGTTCATTGAGCGCAAATGGTGACGTCTTAATAAACGTGGAGATAAACCGACAATATTGATGCTCGCTGCGAAGTTTTTCCGCCGCCCGGGC
>e_coli_DH1_illumina_1x100.fasta.000000000
CGTGATATGGTACCATAATGCGCATCGCTGCGCTTGGTGTCCTGGTAGCGTGGAATAACCGAAGTATGGACCGTTTGCTG
CCGAAATCTTCCCTCGCGTC
>e_coli_DH1_illumina_1x10000.fasta.000000000
TGTAATGAGATTGATCGAGCGGAAATAGGTACTGTCTTAATAAACATGGAGATACGACGATGTTAGATCACTCGCTGCGA
TCTTATCCGCCGCCCGGGCA
>e_coli_DH1_illumina_1x10000.fasta.000000000
CGCAGTCATTCAATTGGCGCAAATGGTGACTCTTAACATAAACTGGTGAAGATAACCGACAACATTATGGCTCGCATGCG
AAGTTGTTACCGGGCATGCA
>e_coli_DH1_illumina_1x10000.fasta.000000000
CGTAATAAGGTTCATAGCGCAAATGGTGACGTCTTAATAAACGTGGAGATAAACCGACAATATCTTGATGCCGCGCGAAG
TTTTTCCGCCGCCCGGGCAG
>e_coli_DH1_illumina_1x10000.fasta.000000000
CGTAATAAGGTTCACTGAGAGCAAATGGTGACTCTTAATAAAGCGTGGAGATAAGCCGACAATATTGATGCTCGCTCGAA
GTTTTTCCGCTGCCCGGGCA
>e_coli_DH1_illumina_1x10000.fasta.000000000
CGTAATAAGGTTCATTGAGCGCAAATGGTGACGTCTTATAAGTGGAGATAAACCGACAATATTGATGCTCGCTGCGAAGT
TTTTCCGCCGCCCGGGCAGC