Skip to content
Commits on Source (3)
CC?=gcc
LD=${CC}
CFLAGS= -Wall -pedantic -std=c99 -g -O3 -DSORTEDUNMAPPED -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -DDBGNFO -DSHOWALIGN -DDBGLEVEL=0 -DPROGNFO -Ilibs -Ilibs/sufarray -Isamtools
CFLAGS += `pkg-config --cflags htslib`
INC := -I include
CTAGS = ctags > tags
LIB = -lm -lpthread -lz -lncurses -L libs -lform -lmenu -L/usr/local/lib/
LIB += `pkg-config --libs htslib`
LIB += "-Wl,-rpath,`pkg-config --variable=libdir htslib`"
PRGTARGETS := segemehl haarz
LIBDIR := libs
BUILDDIR:= build
TARGETDIR := .
TARGETEXT := .x
SRCEXT := c
SOURCES := $(shell find $(LIBDIR) -type f -name *.$(SRCEXT))
SOURCES := $(filter-out $(EXCLUDE), $(SOURCES))
PRGSOURCES := $(patsubst %,$(LIBDIR)/%.c,$(PRGTARGETS))
LIBSOURCES := $(filter-out $(PRGSOURCES), $(SOURCES))
OBJECTS := $(patsubst $(LIBDIR)/%,$(BUILDDIR)/%,$(SOURCES:.$(SRCEXT)=.o))
LIBOBJECTS := $(patsubst $(LIBDIR)/%,$(BUILDDIR)/%,$(LIBSOURCES:.$(SRCEXT)=.o))
$(PRGTARGETS): $(OBJECTS)
@echo "Linking $@";
$(LD) $(LIBOBJECTS) $(BUILDDIR)/$@.o -o $(TARGETDIR)/$@$(TARGETEXT) $(LIB)
$(BUILDDIR)/%.o: $(LIBDIR)/%.c
@echo "Building... library for source $@";
@mkdir -p $(BUILDDIR)
$(CC) $(CFLAGS) $(INC) -c -o $@ $<
all: $(PRGTARGETS)
clean:
@echo " Cleaning...";
@echo " $(RM) -r $(BUILDDIR) $(PRGTARGETS)"; $(RM) -r $(BUILDDIR) $(PRGTARGETS)
.PHONY: clean
CC?=gcc
LD=${CC}
CFLAGS= -Wall -pedantic -std=c99 -g -O3 -DSORTEDUNMAPPED -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -DDBGNFO -DSHOWALIGN -DDBGLEVEL=0 -DPROGNFO -Ilibs -Ilibs/sufarray -Isamtools
CFLAGS += `pkg-config --cflags htslib`
INC := -I include
CTAGS = ctags > tags
LIB = -lm -lpthread -lz -lncurses -L libs -lform -lmenu -L/usr/local/lib/
LIB += `pkg-config --libs htslib`
LIB += "-Wl,-rpath,`pkg-config --variable=libdir htslib`"
ENTERTHETARGETSHERE
LIBDIR := libs
BUILDDIR:= build
TARGETDIR := .
TARGETEXT := .x
SRCEXT := c
SOURCES := $(shell find $(LIBDIR) -type f -name *.$(SRCEXT))
SOURCES := $(filter-out $(EXCLUDE), $(SOURCES))
PRGSOURCES := $(patsubst %,$(LIBDIR)/%.c,$(PRGTARGETS))
LIBSOURCES := $(filter-out $(PRGSOURCES), $(SOURCES))
OBJECTS := $(patsubst $(LIBDIR)/%,$(BUILDDIR)/%,$(SOURCES:.$(SRCEXT)=.o))
LIBOBJECTS := $(patsubst $(LIBDIR)/%,$(BUILDDIR)/%,$(LIBSOURCES:.$(SRCEXT)=.o))
$(PRGTARGETS): $(OBJECTS)
@echo "Linking $@";
$(LD) $(LIBOBJECTS) $(BUILDDIR)/$@.o -o $(TARGETDIR)/$@$(TARGETEXT) $(LIB)
$(BUILDDIR)/%.o: $(LIBDIR)/%.c
@echo "Building... library for source $@";
@mkdir -p $(BUILDDIR)
$(CC) $(CFLAGS) $(INC) -c -o $@ $<
all: $(PRGTARGETS)
clean:
@echo " Cleaning...";
@echo " $(RM) -r $(BUILDDIR) $(PRGTARGETS)"; $(RM) -r $(BUILDDIR) $(PRGTARGETS)
.PHONY: clean
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: segemehl
Upstream-Contact: steve@bioinf.uni-leipzig.de
Source: http://www.bioinf.uni-leipzig.de/Software/segemehl/
Files-Excluded: */cscope.out
*.o
*~
*.SUNOS
Files: *
Copyright: 2008-2015 Bioinformatik Leipzig
License: non-commercial
SEGEMEHL is free software for non-commercial use
Copyright: 2008-2018 Bioinformatik Leipzig
License: GPL-3+
Files: debian/*
Copyright: 2016 Andreas Tille <tille@debian.org>
License: to_be_clarified
Ask upstream for free license and choose the same for
the packaging.
Copyright: 2016-2018 Andreas Tille <tille@debian.org>
License: GPL-3+
License: GPL-3+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
.
On Debian systems you can find the full text of the GNU General Public
License at /usr/share/common-licenses/GPL-3+.
version=3
version=4
opts="uversionmangle=s/_/./g,repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
http://www.bioinf.uni-leipzig.de/Software/segemehl/ .*/segemehl_(\d[_.\d]+)\.tar\.gz
# opts="searchmode=plain,uversionmangle=s/_/./g,repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
# May be we can use searchmode=plain to fetch "<a href="downloads/segemehl.tar.gz">Download segemehl version 0.3</a>"
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ALIGNMENT_H
#define ALIGNMENT_H
......@@ -16,10 +35,11 @@
*/
#include "basic-types.h"
#include "biofiles.h"
typedef enum
{
Replacement, Deletion, Insertion
Replacement, Deletion, Insertion, Skipped, Softclip, Match, Mismatch
} Eoptype;
typedef struct
......@@ -39,10 +59,12 @@ typedef struct {
Uint voff;
Multieop *meops;
Uint numofmeops;
unsigned char rmvseq;
unsigned char rmuseq;
} Alignment;
void copyAlignment(Alignment *to, Alignment *from);
void showmultieoplist(FILE *dev, Alignment *al);
void showDynmultieoplist(Alignment* al, int size);
......@@ -55,17 +77,36 @@ void wrapAlignment(Alignment *al);
Uint getEdist(Alignment *al);
Uint getBisulfiteMismatches(Alignment *al, Uint bisulfite);
Uint getWrongStrandBisulfiteMismatches(Alignment *al, Uint bisulfite);
void countEops(Alignment *al, Uint *mat, Uint *mis, Uint *ins, Uint *del);
void countEops(Alignment *al, Uint *mat, Uint *mis, Uint *ins, Uint *del, Uint *lmat);
char * multieopstring(Alignment *al, Uint leftpad, Uint rightpad, unsigned char rev);
Uint getUalignlen(Alignment *al);
Uint getValignlen(Alignment *al);
Uint getValignlenAndSkipped(Alignment *al);
int getSubstringEdist(Alignment *al, Uint u, Uint v);
int getAlignScore(Alignment *al, int *scores, int indel);
char* cigarstring(Alignment *al, Uint leftpad, Uint rightpad, char clipch, unsigned char rev);
char* cigarstring(Alignment *al, Uint leftpad, Uint rightpad, char clipch, unsigned char rev, char brief);
char* mdstring(Alignment *al, unsigned char rev);
Uint bl_cigarGetAlignLen(char *cigar);
char* bl_cigarGetAlignString(char *cigar);
char* bl_mdGetDiffString(char *MD);
char* getNTcodekey(void *space);
void getSoftClipScores(Alignment *al, int polyAlen, int *scores, int indel, int *pAscr, int *adscr, int *adlen) ;
char* mdstrings(Alignment **al, Uint noofaligns, unsigned char rev);
char* bl_cigarGetAlignString(char *cigar, uint64_t **, uint64_t **, Uint *nsplits);
Uint* getSplitEdist(Alignment *al, Uint *noofsplits);
Uint* getUPartialAlignlen(Alignment *al, Uint *noofparts);
char* getEopString (Alignment *al);
char isMatch (Alignment *al, Uint i, Uint j);
void showAlignModel(Alignment* al, FILE *dev, gene_t *model);
Alignment* getSubAlignment (Alignment *al, Uint l, Uint r);
Uint bl_alignGet5PrimeV (Alignment *al, char rc);
Uint bl_alignGet3PrimeU (Alignment *al, char rc);
Uint bl_alignGet5PrimeU (Alignment *al, char rc);
void insertMeop(Alignment *al, Eoptype eop, Uint steps);
Alignment* expandAlignment(Alignment *al, Uint* expandpos, Uint *expandlen);
Uint getPartialAlignNumber(Alignment *al);
int getUalignlenNoClip(Alignment *al);
Alignment* bl_getPartialAlignments (Alignment *al, char *vseq, Uint *noofparts);
char* getAlignString(Alignment* al, char lf);
Alignment* reevalAlignment(Alignment *al);
char predictStrand(Alignment *al, char *vseq);
#endif
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ALURUSORT_H
#define ALURUSORT_H
......
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ANNOTATION_
#define _ANNOTATION_
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "stringutils.h"
#include "basic-types.h"
#include "charsequence.h"
#include "gzidx.h"
typedef struct {
int64_t dir5prime;
int64_t dir3prime;
int64_t left;
int64_t right;
int8_t select;
} annotationoffs_t;
typedef struct {
unsigned char type;
Uint trackid;
/*
*
* chromname is gff seqname
*
*/
char *chromname;
Uint chromnamelen;
/*
* BED und GFF: 1-offset
* for personalSNP: start with 0-offset
* end base is not part of the feature ie. if
* end = 100 last feature base is 99. see below.
*
*/
uint64_t start;
uint64_t end;
/*
* GFF: name is the feature key
* BED: name is the name of the acutal feature
* for personalSNP track name is alleles A,C,T,G separated by '/'.
* Leading '-' is indel: insertion if start-end=0
*
* */
char *name;
Uint namelen;
double score;
unsigned char strand;
/*GFF fields*/
unsigned char frame;
char *source;
Uint sourcelen;
Uint noofattributes;
char **attributes;
Uint *attributelen;
/*BED fields*/
uint64_t thickStart;
uint64_t thickEnd;
Uint *itemRgb;
Uint blockCount;
uint64_t* blockSizes;
uint64_t* blockStarts;
Uint noofovl;
Uint firstovl;
Uint level;
/*extension*/
char **blockRefseqs;
char *blockStrands;
/*SNPitem*/
Uint alleleCount;//number of alleles in name
Uint *alleleFreq;//from comma separated list of number of observed alleles - if unkowns 0
Uint *alleleScores;//from a comma separated list - if unkown 0
} annotationitem_t;
typedef struct {
uint64_t noofchr;
char **chrname;
Uint *chrnamelen;
Uint sz;
uint64_t *noofbins;
uint64_t **last; //stores the last interval strictly left of indexed interval [i*sz,i+1*sz];
} annotationindex_t;
typedef struct {
uint64_t init;
char sorted;
char *trackname;
Uint tracknamelen;
char *description;
Uint descriptionlen;
Uint noofitems;
annotationitem_t *items;
char *filename;
Uint filenamelen;
} annotationtrack_t;
typedef struct {
uint64_t init;
char sorted;
Uint nooftracks;
char **trackname;
Uint *tracknamelen;
char **description;
Uint *descriptionlen;
char **filename;
Uint *filenamelen;
Uint noofitems;
annotationitem_t *items;
} annotationmultitrack_t;
void bl_annotationtrackDestruct (void *space, annotationtrack_t *track);
Uint bl_annotationtrackGetStats (void *space, annotationtrack_t *track);
void bl_annotationitemInit (annotationitem_t *item, unsigned char type);
void bl_annotationtrackInit (annotationtrack_t *track);
void bl_annotationitemDestruct (void *space, annotationitem_t *item);
Uint bl_annotationitem_cmp_track (Uint item, void *track, void *elem, void *nfo);
int bl_annotationitem_nostrand_cmp (void const *a, void const *b);
int bl_annotationitem_cmp (void const *a, void const *b);
void bl_annotationtrackDestruct (void *space, annotationtrack_t *track);
void bl_annotationtrackAssignTrackLevel(annotationtrack_t *track);
annotationmultitrack_t* bl_annotationtrackJoin(void *space, annotationmultitrack_t *dest, annotationtrack_t *src);
annotationitem_t* bl_annotationitemCopy(annotationitem_t *dest, annotationitem_t *src);
void bl_annotationmultitrackInit (annotationmultitrack_t *track);
Uint bl_annotationitem_cmp_multitrack (Uint item, void *track, void *elem, void *nfo);
void bl_annotationtrackSetItems(annotationtrack_t* track, annotationitem_t* items, Uint n);
annotationindex_t* bl_annotationIndex(annotationtrack_t *t);
void bl_annotationtrackDumpIndex(annotationindex_t *idx,annotationtrack_t *track);
void bl_annotationmultitrackDestruct (void *space, annotationmultitrack_t *track);
void bl_annotationitemApplyOffset(annotationitem_t *item, int64_t off3, int64_t off5, int64_t left, int64_t right, int8_t select);
void bl_annotationitemDump(FILE *dev, annotationitem_t *item);
void bl_annotationmultitrackApplyOffset(annotationmultitrack_t *mtrack, annotationoffs_t *off);
#endif
#ifndef BAMITER_H
#define BAMITER_H
/*
*
* bamiter.h
* alignment representation
*
*
*/
#include <stdint.h>
#include <inttypes.h>
#include <pthread.h>
#include "filebuffer.h"
#include "htslib/sam.h"
#include "htslib/faidx.h"
#include "htslib/kstring.h"
#include "htslib/khash.h"
#include "samio.h"
#include "intervaltree.h"
#define BAM_ITR_BUFSZ 1000
typedef struct{
uint32_t n;
uint32_t c; //massive padding for better alignment
uint64_t *d;
} bam_cs_data_t;
typedef struct{
uint8_t prev;
uint8_t next;
uint32_t tid;
uint32_t beg;
uint32_t n;
bam_cs_data_t *x;
} bam_cs_t;
/*
* lower -> higher bits
* 0: 4-bit : char encoding
* 4: 1-bit : rc
* 5: 8-bit : nucleotide qual (64)
* 13: 8-bit : mapping qual (64)
* 21: 8-bit : mm/nh
* 29: 2-bit : paired
* 31: 2-bit : conversion protocol
* 33: 16-bit: query position
* 49: 15-bit: number mismatches/nm
*/
#define BAM_CS_NT_MASK ((uint64_t)((1<<4)-1))
#define BAM_CS_RC_MASK ((uint64_t)((1<<1)-1))
#define BAM_CS_NQ_MASK ((uint64_t)((1<<8)-1))
#define BAM_CS_MQ_MASK (((uint64_t)(1<<8)-1))
#define BAM_CS_MM_MASK (((uint64_t)(1<<8)-1))
#define BAM_CS_PP_MASK (((uint64_t)(1<<2)-1))
#define BAM_CS_CP_MASK (((uint64_t)(1<<2)-1))
#define BAM_CS_QP_MASK (((uint64_t)(1<<16)-1))
#define BAM_CS_NM_MASK (((uint64_t)(1<<15)-1))
//#define BAM_CS_RS_MASK (1<<2)-1
#define BAM_CS_NT_LBIT 0
#define BAM_CS_RC_LBIT 4
#define BAM_CS_NQ_LBIT 5
#define BAM_CS_MQ_LBIT 13
#define BAM_CS_MM_LBIT 21
#define BAM_CS_PP_LBIT 29
#define BAM_CS_CP_LBIT 31
#define BAM_CS_QP_LBIT 33
#define BAM_CS_NM_LBIT 49
//#define BAM_CS_RS_LBIT 62
/*
* encoding of common nucleotides
*/
#define BAM_CS_NT_A 0x1
#define BAM_CS_NT_C 0x2
#define BAM_CS_NT_G 0x4
#define BAM_CS_NT_T 0x8
#define BAM_CS_NT_N 0xF
#define BAM_CS_NT_D 0x0
#define BAM_CS_DEFAULT_REG 1000000
#define BAM_CS_DEFAULT_COVERAGE 30
#define BAM_CS_DEFAULT_MAX_COVERAGE 10000
#define BAM_CS_DEFAULT_CIRCBUF_SZ 10000000
#define METHYL_FWD_STRAND 1
#define METHYL_REV_STRAND 2
#define METHYL_BTH_STRAND 0
//char cop = (char) bam_cigar_opchr(cigar[i]);
//fprintf(stderr, "%c", "=ACMGRSVTWYHKDBN"[bam_seqi(seq, qpos+j)]);
#define BAM_CS_GET_NT(val) (((val) & BAM_CS_NT_MASK) )
#define BAM_CS_GET_RC(val) (((val) >> BAM_CS_RC_LBIT) & BAM_CS_RC_MASK)
#define BAM_CS_GET_NQ(val) (((val) >> BAM_CS_NQ_LBIT) & BAM_CS_NQ_MASK)
#define BAM_CS_GET_MQ(val) (((val) >> BAM_CS_MQ_LBIT) & BAM_CS_MQ_MASK)
#define BAM_CS_GET_MM(val) (((val) >> BAM_CS_MM_LBIT) & BAM_CS_MM_MASK)
#define BAM_CS_GET_PP(val) (((val) >> BAM_CS_PP_LBIT) & BAM_CS_PP_MASK)
#define BAM_CS_GET_CP(val) (((val) >> BAM_CS_CP_LBIT) & BAM_CS_CP_MASK)
#define BAM_CS_GET_QP(val) (((val) >> BAM_CS_QP_LBIT) & BAM_CS_QP_MASK)
#define BAM_CS_GET_NM(val) (((val) >> BAM_CS_NM_LBIT) & BAM_CS_NM_MASK)
#define BAM_CS_GET_RS(val) (((val) >> BAM_CS_RS_LBIT) & BAM_CS_RS_MASK)
typedef struct{
bam_hdr_t *hdr;
uint32_t last_tid;
uint32_t last_beg;
uint32_t last_len;
uint8_t isthreaded;
pthread_mutex_t *mtx;
} bl_bam_methyl_master_t;
typedef struct {
FILE *out;
hts_idx_t *idx;
bam_hdr_t *hdr;
samFile *in;
faidx_t *fai;
intervalforest_t *forest;
uint8_t uniqueonly;
uint8_t isthreaded;
bl_bam_methyl_master_t *ms;
pthread_mutex_t *devmtx;
pthread_mutex_t *faimtx;
pthread_mutex_t *bammtx;
circbuffer_t *cb;
} bl_bam_methyl_worker_t;
typedef struct {
samFile *in;
faidx_t *fai;
hts_idx_t *idx;
bam_hdr_t *hdr;
} bam_info_t;
typedef struct {
uint16_t n;
uint16_t i;
uint16_t maxn;
uint8_t next;
bam1_t *d;
uint8_t isthreaded;
pthread_mutex_t *mtx;
} bl_bam1_buffer_t;
bam_info_t* bl_bamLoadInfo(bam_info_t *nfo, char *bamfn, char *fafn);
bl_bam_methyl_master_t* bl_bamInitMaster(bl_bam_methyl_master_t *ms,
bam_hdr_t *hdr, uint8_t isthreaded, pthread_mutex_t *mtx);
bl_bam_methyl_worker_t* bl_bamInitWorker(bl_bam_methyl_worker_t *wk,
bl_bam_methyl_master_t *ms, FILE *out, bam_info_t *nfo, uint8_t uniqueonly,
uint8_t isthreaded, pthread_mutex_t* devmtx, pthread_mutex_t *bammtx,
circbuffer_t *cb, intervalforest_t *forest);
void* bl_bamCrossSectioMethylWorker(void *args);
void* bl_bamMethylStringWorker(void *args);
void bl_bamDestructInfo(bam_info_t *nfo);
void bl_bamPrintBamrec (htsFile *fp, samrec_t *rec, bam_hdr_t *hdr, pthread_mutex_t *mtx) ;
htsFile* bl_bamOpenFile(char *fn);
bam_hdr_t* bl_bamGetHeader (samheader_t *head, Uint binno);
#endif
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef BASIC_TYPES_H
#define BASIC_TYPES_H
......@@ -14,6 +33,8 @@
#define MAXBUFFERSIZE 10000
#define BASEINC 10000
#define MAX_INT_LENGTH 50
#define MAGIC_INIT 0x0BADF00D
typedef unsigned char Uchar;
typedef unsigned int Uint;
typedef signed long long Lint;
......@@ -69,6 +90,12 @@ typedef struct {
} QuadSint;
typedef struct {
Uint a,
b,
c,
d;
} QuadUint;
#endif
......
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _BEDFILES_
#define _BEDFILES_
/*
*
* bedfiles.h
*
*
* @author Steve Hoffmann, steve@bioinf.uni-leipzig.de
* @company Bioinformatics, University of Leipzig
* @date 10/30/16 17:41:47 CET
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "stringutils.h"
#include "basic-types.h"
#include "charsequence.h"
#include "gzidx.h"
#include "biofiles.h"
annotationtrack_t* bl_BEDread (void *space, char *filename);
void bl_annotationitemDump(FILE *dev, annotationitem_t *item);
void bl_BEDwrite (annotationitem_t *items, Uint n, FILE *dev);
#endif
#ifndef BGZIP_H
#define BGZIP_H
#include <inttypes.h>
#include "gzip.h"
typedef struct {
uint32_t lenCompressedData;
} bgzip_Header ;
bgzip_Header bgzip_Header_default();
int bgzip_extractBgzHeader(gzip_Header* gzipHeader, bgzip_Header *bgzipHeader);
int64_t bgzip_findLenUncompressedData(FILE* f, gzip_Header* gzipHeader,
bgzip_Header *bgzipHeader);
#endif // BGZIP_H
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _BIOFILES_
#define _BIOFILES_
......@@ -21,11 +40,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "stringutils.h"
#include "basic-types.h"
#include "charsequence.h"
#include "zran.h"
#include "gzidx.h"
#include "annotation.h"
#define ID 1
#define IDEND 2
......@@ -81,6 +100,7 @@ typedef struct fasta_s {
unsigned char hasMates;
unsigned char hasIndex;
unsigned char chunkIsActive;
unsigned char checkid;
Uint nooffiles;
Uint *filetotal;
......@@ -96,87 +116,9 @@ typedef struct fasta_s {
struct access **gzindex;
struct access **mategzindex;
} fasta_t;
typedef struct {
unsigned char type;
/*
*
* chromname is gff seqname
*
*/
char *chromname;
Uint chromnamelen;
Uint nextindex;
/*
* BED und GFF: 1-offset
* for personalSNP: start with 0-offset
* end base is not part of the feature ie. if
* end = 100 last feature base is 99. see below.
*
*/
Uint start;
Uint end;
/*
* GFF: name is the feature key
* BED: name is the name of the acutal feature
* for personalSNP track name is alleles A,C,T,G separated by '/'.
* Leading '-' is indel: insertion if start-end=0
*
* */
char *name;
Uint namelen;
double score;
unsigned char strand;
/*GFF fields*/
unsigned char frame;
char *source;
Uint sourcelen;
Uint noofattributes;
char **attributes;
Uint *attributelen;
/*BED fields*/
Uint thickStart;
Uint thickEnd;
Uint *itemRgb;
Uint blockCount;
Uint* blockSizes;
Uint* blockStarts;
Uint noofovl;
Uint firstovl;
Uint level;
/*extension*/
char **blockRefseqs;
char *blockStrands;
/*SNPitem*/
Uint alleleCount;//number of alleles in name
Uint *alleleFreq;//from comma separated list of number of observed alleles - if unkowns 0
Uint *alleleScores;//from a comma separated list - if unkown 0
} annotationitem_t;
typedef struct {
char *trackname;
Uint tracknamelen;
char *description;
Uint descriptionlen;
Uint noofitems;
annotationitem_t *items;
} annotationtrack_t;
} fasta_t;
typedef struct {
......@@ -279,14 +221,34 @@ void bl_fastaSetMateClip (fasta_t *f, Uint elem, Uint p5, Uint p3);
void bl_fastaSetClip (fasta_t *f, Uint elem, Uint p5, Uint p3);
int bl_rm(void *space, char *filename);
Uint bl_fastxFindIDIdx (char *id, fasta_t *set);
annotationtrack_t* bl_BEDread (void *space, char *filename);
void bl_BEDwrite (annotationtrack_t *track, FILE *dev);
void bl_annotationtrackDestruct (void *space, annotationtrack_t *track);
annotationtrack_t* bl_GFFread (void *space, char *filename);
Uint bl_annotationitem_cmp_track (Uint item, void *track, void *elem, void *nfo);
int bl_fastxIDcmp (char *a, char *b);
void bl_GFFAddAttribute (void *space, annotationitem_t *item, char *attr, Uint len);
void bl_GFFwrite(char *filename, annotationtrack_t *set);
Uint bl_annotationtrackGetStats (void *space, annotationtrack_t *track);
char* bl_fastaGetLeftClip(fasta_t *f, Uint elem) ;
char* bl_fastaGetRightClip(fasta_t *f, Uint elem) ;
char* bl_fastaGetLeftClipQual(fasta_t *f, Uint elem) ;
char* bl_fastaGetRightClipQual(fasta_t *f, Uint elem) ;
char* bl_fastaGetMateLeftClip(fasta_t *f, Uint elem) ;
char* bl_fastaGetMateRightClip(fasta_t *f, Uint elem) ;
char* bl_fastaGetMateLeftClipQual(fasta_t *f, Uint elem) ;
char* bl_fastaGetMateRightClipQual(fasta_t *f, Uint elem) ;
char* bl_fastaGetMateNoClip(fasta_t *f, Uint elem);
char* bl_fastaGetSequenceNoClip(fasta_t *f, Uint elem);
char* bl_fastaGetQualityNoClip(fasta_t* f, Uint elem);
char* bl_fastaGetMateQualityNoClip(fasta_t *f, Uint elem);
void bl_addExon (gene_t *gene, Uint start, Uint end, char strand, char *refchr);
void bl_dumpGene(gene_t *gene);
gene_t* bl_initGene (char *identifier, char strand);
char bl_isExon(gene_t *gene, Uint pos);
void bl_wrapGene(gene_t *gene);
Uint bl_getExonNumber (gene_t *model);
Uint bl_getExonStart (gene_t *model, Uint id);
Uint bl_getExonEnd (gene_t *model, Uint id);
fasta_t* bl_fastxCopyIndex (void *space, fasta_t *f, Uint offset, Uint size);
fasta_t* bl_fastxCopy (void *space, fasta_t *f, Uint offset, Uint size);
annotationtrack_t* bl_annotationRead (void *space, char *fn) ;
#endif
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef BITARRAY_H
#define BITARRAY_H
......
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef BITVECTOR_H
#define BITVECTOR_H
......
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
*
......@@ -71,5 +90,7 @@ char*
getstringalphabet (void *space, char *string, Uint len, Uint *asize);
Alignment*
bitvectorbacktrack(Alignment *al, bitvector *D, Uint dim, Uint k, Uint l);
bitvectorbacktrack(Alignment *al, bitvector *D, Uint dim, Uint k,
Uint l,
char *subject, Uint *enctab, bitvector *peq); //patch
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
*
* brendel.h
*
*
* @author Steve Hoffmann, steve@bioinf.uni-leipzig.de
* @company Bioinformatics, University of Leipzig
* @date 07/01/2016 03:25:21 PM CEST
*
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
#include <math.h>
#include "basic-types.h"
#include "alignment.h"
#include "memory.h"
#include "mathematics.h"
#include "iupac.h"
#include "biofiles.h"
Alignment* splicedaligndp (char *read, unsigned int m, char *genome, unsigned int n, gene_t **model);
mapping_t* bl_dpsplicealign2map(Alignment *al, gene_t *model, MultiCharSeq *mseq, Uint vpos, Uint vlen, char strand, char *querydesc, char *query, char *qual, Uint ulen, char ismate) ;
char bl_checkSpliceAlign(mapping_t *m);
Alignment* splicedaligndpopt (char *read, unsigned int m, char *genome, unsigned int n, gene_t **model, Uint a, Uint b, Uint l, Uint r);
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INTSEQUENCE_H
#define INTSEQUENCE_H
......
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
*
......@@ -34,10 +53,12 @@
"\"Stefan Kurtz uses suffix arrays to fix his bike.\" (A. Torda)\0",
"\"Ich hol' jetzt die Hilti!\" (Ein verzweifelter Bauarbeiter)\0",
"\"Kaeff'chen?\" (Lars)\0",
"\"Wir sind hier nicht in Seattle Dirk!\" (Tocotronic)\0"};
"\"Wir sind hier nicht in Seattle Dirk!\" (Tocotronic)\0",
"\"Boooooring!\" (David S.)\0"
};
unsigned citenumber = 15;
unsigned citenumber = 16;
char* citerand() {
Uint r;
......
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* container.h
* implementation of a simple container for objects of defined size
......
/*
* segemehl - a read aligner
* Copyright (C) 2008-2017 Steve Hoffmann and Christian Otto
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef DEBUG_H
#define DEBUG_H
......