Skip to content
Commits on Source (9)
subread (1.6.2+dfsg-1) unstable; urgency=medium
* New upstream version 1.6.2+dfsg
* d/control:
- bump Policy to 4.1.4
- update uploader's email in d/{control,copyright}
- bump compat to 11
* Remove --parallel from dh invocation in d/rules
* Generate man page for flattenGTF with help2man
* Install new binary flattenGTF
* Refresh and update patches
-- Alexandre Mestiashvili <mestia@debian.org> Fri, 18 May 2018 19:07:58 +0200
subread (1.6.1+dfsg-1) unstable; urgency=medium
[ Steffen Möller ]
......
Source: subread
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Alexandre Mestiashvili <alex@biotec.tu-dresden.de>,
Uploaders: Alexandre Mestiashvili <mestia@debian.org>,
Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: bc,
debhelper (>= 10),
debhelper (>= 11),
help2man,
python,
zlib1g-dev
Standards-Version: 4.1.3
Standards-Version: 4.1.4
Vcs-Browser: https://salsa.debian.org/med-team/subread
Vcs-Git: https://salsa.debian.org/med-team/subread.git
Homepage: http://sourceforge.net/projects/subread/
......
......@@ -8,7 +8,7 @@ Copyright: 2011-2018, Dr Yang Liao, Dr Wei Shi
License: GPL-3.0+
Files: debian/*
Copyright: 2018 Alexandre Mestiashvili <alex@biotec.tu-dresden.de>
Copyright: 2018 Alexandre Mestiashvili <mestia@debian.org>
License: GPL-3.0+
License: GPL-3.0+
......
......@@ -9,17 +9,16 @@ Description: Inject architecture specific flags:
Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
--- subread.orig/src/Makefile.Linux
+++ subread/src/Makefile.Linux
@@ -3,10 +3,10 @@
@@ -7,9 +7,9 @@
include makefile.version
-include ~/.R/DBPZ_debug_makefile
OPT_LEVEL = 3
-CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 # -w
-CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 ${WARNING_LEVEL}
-LDFLAGS = ${STATIC_MAKE} -pthread -lz -lm ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE
+CCFLAGS += -O${OPT_LEVEL} -fsigned-char -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64
+LDFLAGS += ${STATIC_MAKE} -pthread -lz -lm -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE
CC_EXEC = gcc
-CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb
+CC = ${CC_EXEC} ${CFLAGS} ${CPPFLAGS} ${CCFLAGS} -ggdb -fmessage-length=0
+CCFLAGS += -O${OPT_LEVEL} -fsigned-char -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 ${WARNING_LEVEL}
+LDFLAGS += ${STATIC_MAKE} -pthread -lz -lm -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE
+CC = ${CC_EXEC} ${CCFLAGS} ${CFLAGS} ${CPPFLAGS} -fmessage-length=0 -ggdb
ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge long-hashtable core-bigtable seek-zlib
......@@ -29,9 +28,9 @@ Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
OPT_LEVEL = 3
include ../makefile.version
include make.version
-CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D_FILE_OFFSET_BITS=64 -DSUBREAD_VERSION=\"${SUBREAD_VERSION}\"
-CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D_FILE_OFFSET_BITS=64 -DSUBREAD_VERSION=\"${SUBREAD_VERSION}\" ${WARNING_LEVEL}
-LDFLAGS = -lpthread -lz -lm -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE # -DREPORT_ALL_THE_BEST
+CCFLAGS += -O${OPT_LEVEL} -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D_FILE_OFFSET_BITS=64 -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\"
+CCFLAGS += -O${OPT_LEVEL} -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D_FILE_OFFSET_BITS=64 -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" ${WARNING_LEVEL}
+LDFLAGS += -lpthread -lz -lm -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE # -DREPORT_ALL_THE_BEST
CC_EXEC = gcc
-CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb
......@@ -39,3 +38,4 @@ Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
ALL_LIBS=LRMsorted-hashtable LRMbase-index LRMchro-event LRMhelper LRMseek-zlib LRMfile-io LRMhashtable
......@@ -2,7 +2,7 @@ Subject: Fix ftbfs on kfreebsd
From: Alex Mestiashvili <alex@biotec.tu-dresden.de>
--- subread.orig/src/HelperFunctions.c
+++ subread/src/HelperFunctions.c
@@ -777,6 +777,9 @@
@@ -791,6 +791,9 @@
#if defined(FREEBSD) || defined(__MINGW32__)
return 1;
#else
......@@ -12,7 +12,7 @@ From: Alex Mestiashvili <alex@biotec.tu-dresden.de>
#ifdef MACOS
int mib[6], x1, ret = 1;
size_t len;
@@ -867,6 +870,7 @@
@@ -881,6 +884,7 @@
return 1;
#endif
#endif
......
Description: Fix typo
Description: Fix typos
Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
--- subread.orig/src/sambam-file.c
+++ subread/src/sambam-file.c
@@ -186,7 +186,7 @@
@@ -187,7 +187,7 @@
return "MIDNSHP=X"[ch];
else
{
......@@ -11,3 +11,23 @@ Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
return 'M';
}
}
--- subread.orig/src/readSummary.c
+++ subread/src/readSummary.c
@@ -3691,7 +3691,7 @@
assigned_no++;
}
}
- if(is_etc) sprintf(final_feture_names + strlen(final_feture_names), "... (%d names ommited),", is_etc);
+ if(is_etc) sprintf(final_feture_names + strlen(final_feture_names), "... (%d names omitted),", is_etc);
final_feture_names[GENE_NAME_LIST_BUFFER_SIZE-1]=0;
if(RG_name){
@@ -6439,7 +6439,7 @@
else if(optarg[0]=='5')
reduce_5_3_ends_to_one = REDUCE_TO_5_PRIME_END;
else{
- SUBREADprintf("Invalide parameter to the --read2pos option: %s\n", optarg);
+ SUBREADprintf("Invalid parameter to the --read2pos option: %s\n", optarg);
STANDALONE_exit(-1);
}
}
......@@ -20,7 +20,7 @@ ifeq ($(DEB_HOST_ARCH_OS), kfreebsd)
endif
%:
dh $@ --parallel
dh $@
override_dh_clean:
cd src; make -f Makefile.Linux clean
......@@ -82,6 +82,10 @@ The output file is compatible with featureCounts program' \
$(HELP2MAN) --name='Remove duplicated reads' \
$(utildir)/removeDup| debian/filter.pl > $(mandir)/removeDup.1;
$(HELP2MAN) --name='Flatten features included in a GTF annotation and save the modified annotation \
to a SAF format file.' \
$(utildir)/flattenGTF| debian/filter.pl > $(mandir)/flattenGTF.1;
dh_auto_install
override_dh_installexamples-indep:
......
......@@ -12,3 +12,4 @@ bin/utilities/removeDup usr/lib/subread
bin/utilities/repair usr/lib/subread
bin/utilities/subread-fullscan usr/lib/subread
bin/utilities/txUnique usr/lib/subread
bin/utilities/flattenGTF usr/lib/subread
This diff is collapsed.
......@@ -52,6 +52,20 @@
#include "HelperFunctions.h"
char * get_short_fname(char * lname){
char * ret = lname;
int x1;
for(x1 = strlen(lname)-1; x1>=0; x1--){
if(lname [x1] == '/'){
ret = lname + x1 + 1;
break;
}
}
return ret;
}
// This assumes the first part of Cigar has differet strandness to the main part of the cigar.
// Pos is the LAST WANTED BASE location before the first strand jump (split by 'b' or 'n').
......
......@@ -75,4 +75,6 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
void * context, int do_add_feature(char * gene_name, char * transcript_id, char * chrome_name, unsigned int start, unsigned int end, int is_negative_strand, void * context) );
HashTable * load_alias_table(char * fname) ;
char * get_short_fname(char * lname);
#endif
#MACOS = -D MACOS
include makefile.version
CC_EXEC = gcc
OPT_LEVEL = 3
CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 # -w
include makefile.version
-include ~/.R/DBPZ_debug_makefile
CCFLAGS = -mtune=core2 ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 ${WARNING_LEVEL}
LDFLAGS = ${STATIC_MAKE} -pthread -lz -lm ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE
CC_EXEC = gcc
CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb
......@@ -14,11 +17,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # samMappedBases mergeVCF testZlib
all: detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount flattenGTF # samMappedBases mergeVCF testZlib
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv detectionCall repair coverageCount propmapped qualityScores removeDup subread-fullscan txUnique ../bin/utilities
mv detectionCall repair coverageCount propmapped qualityScores removeDup subread-fullscan txUnique flattenGTF ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -34,6 +37,9 @@ sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
rm -f longread-one/*.o
cd longread-one && $(MAKE)
flattenGTF: flattenAnnotations.c ${ALL_OBJECTS}
${CC} -o flattenGTF flattenAnnotations.c ${ALL_OBJECTS} ${LDFLAGS}
detectionCall: detection-calls.c ${ALL_OBJECTS}
${CC} -o detectionCall detection-calls.c ${ALL_OBJECTS} ${LDFLAGS}
......
MACOS = -D MACOS
include makefile.version
CCFLAGS = -mtune=core2 ${MACOS} -O9 -w -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64
CCFLAGS = -mtune=core2 ${MACOS} -O9 -w -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 ${WARNING_LEVEL}
LDFLAGS = -pthread -lz -lm ${MACOS} -DMAKE_FOR_EXON -D MAKE_STANDALONE # -DREPORT_ALL_THE_BEST
CC = gcc ${CCFLAGS} ${STATIC_MAKE} -ggdb -fomit-frame-pointer -O3 -ffast-math -funroll-loops -mmmx -msse -msse2 -msse3 -fmessage-length=0
......@@ -11,11 +11,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # globalReassembly testZlib
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount flattenGTF # globalReassembly testZlib
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair coverageCount subread-fullscan qualityScores removeDup propmapped ../bin/utilities
mv repair coverageCount subread-fullscan qualityScores removeDup propmapped flattenGTF ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -31,6 +31,9 @@ sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
rm -f longread-one/*.o
cd longread-one && $(MAKE)
flattenGTF: flattenAnnotations.c ${ALL_OBJECTS}
${CC} -o flattenGTF flattenAnnotations.c ${ALL_OBJECTS} ${LDFLAGS}
repair: read-repair.c ${ALL_OBJECTS}
${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -264,7 +264,7 @@ int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, ch
while(!feof(tmp_fp))
{
int type_char = fgetc(tmp_fp);
int type_char = fgetc(tmp_fp), rlen=-1;
if(type_char == EOF ) break;
fseek(tmp_fp, -1 , SEEK_CUR);
......@@ -273,7 +273,11 @@ int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, ch
{
VCF_temp_read_t SNP_rec;
fread(&SNP_rec, sizeof(SNP_rec),1 , tmp_fp);
rlen = fread(&SNP_rec, sizeof(SNP_rec),1 , tmp_fp);
if(rlen < 1){
SUBREADputs("ERROR: the temporary file is broken.");
return -1;
}
if(!(*SNP_bitmap_recorder))
{
(*SNP_bitmap_recorder)=malloc((reference_len/8)+2);
......@@ -292,10 +296,25 @@ int read_tmp_block(struct SNP_Calling_Parameters * parameters, FILE * tmp_fp, ch
char read[MAX_READ_LENGTH];
char qual[MAX_READ_LENGTH];
fread(&read_rec, sizeof(read_rec), 1, tmp_fp);
fread(&read_len, sizeof(short), 1, tmp_fp);
fread(read, sizeof(char), read_len, tmp_fp);
int rlen = fread(qual, sizeof(char), read_len, tmp_fp);
int rlen = fread(&read_rec, sizeof(read_rec), 1, tmp_fp);
if(rlen < 1){
SUBREADputs("ERROR: the temporary file is broken.");
return -1;
}
rlen = fread(&read_len, sizeof(short), 1, tmp_fp);
if(rlen < 1){
SUBREADputs("ERROR: the temporary file is broken.");
return -1;
}
rlen = fread(read, sizeof(char), read_len, tmp_fp);
if(rlen < read_len){
SUBREADputs("ERROR: the temporary file is broken.");
return -1;
}
rlen = fread(qual, sizeof(char), read_len, tmp_fp);
if(rlen < read_len){
SUBREADputs("ERROR: the temporary file is broken.");
return -1;
......@@ -1803,9 +1822,9 @@ int main_snp_calling_test(int argc,char ** argv)
print_in_box(80,1,1,"exactSNP setting");
print_in_box(80,0,1,"");
print_in_box(80,0,0," Input file : %s (%s)", in_SAM_file, parameters.is_BAM_file_input?"BAM":"SAM");
print_in_box(80,0,0," Output file : %s", out_BED_file);
print_in_box(80,0,0," Reference genome : %s", in_FASTA_file);
print_in_box(80,0,0," Input file : %s (%s)", get_short_fname(in_SAM_file), parameters.is_BAM_file_input?"BAM":"SAM");
print_in_box(80,0,0," Output file : %s", get_short_fname(out_BED_file));
print_in_box(80,0,0," Reference genome : %s", get_short_fname(in_FASTA_file));
print_in_box(80,0,1,"");
print_in_box(80,0,0," Threads : %d", threads);
print_in_box(80,0,0," Min supporting reads : %d", parameters.min_supporting_read_number);
......@@ -1817,7 +1836,7 @@ int main_snp_calling_test(int argc,char ** argv)
print_in_box(80,0,0," P value upper bound : %.5f", parameters.cutoff_upper_bound);
print_in_box(80,0,0," Flanking windows size : %d", parameters.fisher_exact_testlen);
if(parameters.known_SNP_vcf[0])
print_in_box(80,0,0," Known SNP annotations : %s", parameters.known_SNP_vcf);
print_in_box(80,0,0," Known SNP annotations : %s", get_short_fname(parameters.known_SNP_vcf));
print_in_box(80,0,1,"");
print_in_box(80,2,1,"http://subread.sourceforge.net/");
......
/***************************************************************
The Subread software package is free software package:
you can redistribute it and/or modify it under the terms
of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
Subread is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
Authors: Drs Yang Liao and Wei Shi
***************************************************************/
#include <stdio.h>
#include <ctype.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <getopt.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifdef WINDOWS
#define __i686__
#include <winsock2.h>
#else
#include <arpa/inet.h>
#endif
#include <unistd.h>
#include "subread.h"
#include "sublog.h"
#include "core.h"
#include "input-files.h"
#include "sorted-hashtable.h"
#include "core-indel.h"
#include "core-junction.h"
#define BASE_BLOCK_LENGTH_INDEX 15000000
#define BUCKET_SIZE_INDEX 10000
#define ntohll(x) ( ( (unsigned long long)(ntohl( (unsigned int )(((x) << 32) >> 32) ))<< 32) | ntohl( ((unsigned int)((x) >> 32)) ) )
#define htonll(x) ntohll(x)
char temp_file_prefix[MAX_FILE_NAME_LENGTH];
struct read_position_info
{
unsigned long long file_offset;
unsigned int chro_offset;
unsigned short flags;
unsigned short section_length;
char cigar[EXON_MAX_CIGAR_LEN];
};
// BASE_NUMBER_CURVE_POINTS should be (2+4*n) where n is an integer.
#define BASE_NUMBER_CURVE_POINTS 6
struct bucket_info
{
char chromosome_name[MAX_CHROMOSOME_NAME_LEN];
unsigned long long int L2index_offset;
unsigned int chromosome_offset;
unsigned int bases_in_bucket;
unsigned int reads_in_bucket;
unsigned short base_number_curve[BASE_NUMBER_CURVE_POINTS];
};
unsigned int calculate_read_span(char * cigar)
{
int xk0 = 0;
unsigned int x = 0, ret = 0;
while (1)
{
char nch = cigar[xk0];
if(!nch)break;
if(isdigit(nch))
x = x*10+nch-'0';
else
{
if(nch!='I') ret += x;
x=0;
}
xk0++;
}
return ret;
}
unsigned int transform_pillup_to_index(char * chromosome_name, unsigned int block_start, char * temp_file_name, FILE * L1index_fp, FILE * L2index_fp)
{
FILE * pile_fp = f_subr_open(temp_file_name,"rb");
if(!pile_fp) return 0;
int buckets_in_block = BASE_BLOCK_LENGTH_INDEX / BUCKET_SIZE_INDEX;
struct read_position_info ** buckets = malloc(sizeof(struct read_position_info *) * buckets_in_block);
unsigned int * bucket_counts = malloc(sizeof(int) * buckets_in_block);
unsigned int * bucket_sizes = malloc(sizeof(int) * buckets_in_block);
unsigned int * bucket_bases = malloc(sizeof(int) * buckets_in_block);
unsigned int * bucket_curve_bases = malloc(sizeof(int) * buckets_in_block * BASE_NUMBER_CURVE_POINTS);
unsigned int total_reads= 0;
memset(buckets, 0, sizeof(struct read_position_info *) * buckets_in_block);
memset(bucket_curve_bases, 0, sizeof(int) * buckets_in_block * BASE_NUMBER_CURVE_POINTS);
memset(bucket_counts, 0, sizeof(int) * buckets_in_block);
memset(bucket_sizes, 0, sizeof(int) * buckets_in_block);
while(!feof(pile_fp))
{
struct read_position_info record;
fread(&record, sizeof(record), 1, pile_fp);
unsigned int read_chro_offset = ntohl(record.chro_offset), window_pointer;
unsigned int read_span = calculate_read_span(record.cigar);
for(window_pointer = read_chro_offset; window_pointer < read_chro_offset + read_span; window_pointer += BUCKET_SIZE_INDEX )
{
if(window_pointer<block_start)continue;
int bucket_no = (window_pointer-block_start) / BUCKET_SIZE_INDEX;
if(bucket_no >=buckets_in_block) continue;
int bucket_curve_offset = (int)((window_pointer-block_start)/(BUCKET_SIZE_INDEX*1./BASE_NUMBER_CURVE_POINTS) - bucket_no * BASE_NUMBER_CURVE_POINTS);
struct read_position_info * bucket = buckets[bucket_no];
if(!bucket)
{
bucket = malloc(sizeof(struct read_position_info) * 10);
buckets[bucket_no] = bucket;
bucket_counts[bucket_no] = 0;
bucket_sizes[bucket_no] = 10;
}
if( bucket_counts[bucket_no] == bucket_sizes[bucket_no])
{
bucket = realloc(bucket, sizeof(struct read_position_info)*bucket_sizes[bucket_no]*1.5);
buckets[bucket_no] = bucket;
bucket_sizes[bucket_no] *= 1.5;
}
struct read_position_info * item = &(bucket[bucket_counts[bucket_no]]);
memcpy(item , &record , sizeof(struct read_position_info));
bucket_counts[bucket_no]++;
bucket_bases[bucket_no]+= ntohs(record.section_length);
bucket_curve_bases[bucket_no * BASE_NUMBER_CURVE_POINTS + bucket_curve_offset] += ntohs(record.section_length);
}
total_reads ++;
}
int xk1;
for(xk1 = 0; xk1 < buckets_in_block; xk1++)
{
unsigned long long int L2index_offset = ftello(L2index_fp);
struct bucket_info bucket_record;
strcpy(bucket_record.chromosome_name, chromosome_name);
bucket_record.chromosome_offset = htonl(block_start+xk1 * BUCKET_SIZE_INDEX);
bucket_record.reads_in_bucket = htonl(bucket_counts[xk1]);
bucket_record.bases_in_bucket = htonl(bucket_bases[xk1]);
bucket_record.L2index_offset = htonll(L2index_offset);
int xk2;
for(xk2 = xk1 * BASE_NUMBER_CURVE_POINTS; xk2< xk1 * BASE_NUMBER_CURVE_POINTS + BASE_NUMBER_CURVE_POINTS; xk2++)
{
if(ntohl(bucket_record.bases_in_bucket >0))
{
bucket_record.base_number_curve[xk2 - xk1 * BASE_NUMBER_CURVE_POINTS] = htons((unsigned short)(bucket_curve_bases[xk2] * 30000./ ntohl(bucket_record.bases_in_bucket)));
//printf("RV=%d; CV=%d; TOTAL=%d\n", bucket_curve_bases[xk2] , ntohs(bucket_record.base_number_curve[xk2 - xk1 * BASE_NUMBER_CURVE_POINTS]), ntohl( bucket_record.bases_in_bucket));
}
else bucket_record.base_number_curve[xk2 - xk1 * BASE_NUMBER_CURVE_POINTS] = 0;
}
fwrite(&bucket_record, sizeof(struct bucket_info), 1, L1index_fp);
fwrite(buckets[xk1] , sizeof(struct read_position_info) * bucket_counts[xk1],1, L2index_fp);
}
fclose(pile_fp);
unlink(temp_file_name);
free(buckets);
free(bucket_curve_bases);
free(bucket_counts);
free(bucket_sizes);
return total_reads;
}
int finalise_sam_index(HashTable * chromosome_size_table, char * output_file_prefix)
{
KeyValuePair * cursor;
char temp_file_name[MAX_FILE_NAME_LENGTH];
sprintf(temp_file_name, "%s.L1i",output_file_prefix);
FILE * L1index_fp = f_subr_open(temp_file_name,"wb");
sprintf(temp_file_name, "%s.L2i",output_file_prefix);
FILE * L2index_fp = f_subr_open(temp_file_name,"wb");
int bucket;
for(bucket=0; bucket<chromosome_size_table -> numOfBuckets; bucket++)
{
cursor = chromosome_size_table -> bucketArray[bucket];
while(1)
{
if(!cursor) break;
char * chro_name = (char *)cursor -> key;
unsigned int chro_max_length =cursor ->value - NULL - 1;
unsigned int chro_offset = 0;
SUBREADprintf(" === %s : 0 ~ %u === \n", chro_name , chro_max_length);
for(chro_offset = 0; chro_offset < chro_max_length; chro_offset += BASE_BLOCK_LENGTH_INDEX)
{
sprintf(temp_file_name,"%s@%s-%04u.bin", temp_file_prefix, chro_name , chro_offset / BASE_BLOCK_LENGTH_INDEX );
unsigned int total_reads = transform_pillup_to_index(chro_name, chro_offset, temp_file_name, L1index_fp, L2index_fp);
SUBREADprintf("%s has %d reads\n", temp_file_name, total_reads);
}
free(chro_name);
cursor = cursor->next;
}
}
fclose(L1index_fp);
fclose(L2index_fp);
return 0;
}
int write_read_pos(char * chro_name, unsigned int chro_offset_anchor, unsigned short section_chro_length, short flags, unsigned long long file_offset, char * cigar_str, unsigned int read_span, HashTable *pileup_fp_table)
{
char temp_file_name[MAX_FILE_NAME_LENGTH];
unsigned int window_point;
unsigned int last_window_no = 0xffffffff;
for(window_point = chro_offset_anchor; window_point < chro_offset_anchor+read_span; window_point += BUCKET_SIZE_INDEX)
{
unsigned int window_no = window_point / BASE_BLOCK_LENGTH_INDEX;
if(window_no != last_window_no)
{
int close_now = 0;
sprintf(temp_file_name,"%s@%s-%04u.bin", temp_file_prefix, chro_name , window_point / BASE_BLOCK_LENGTH_INDEX );
FILE * pileup_fp = get_temp_file_pointer(temp_file_name, pileup_fp_table, &close_now);
struct read_position_info record;
record.chro_offset = htonl(chro_offset_anchor);
record.file_offset = htonll(file_offset);
record.section_length = htons(section_chro_length);
record.flags = htons(flags);
strcpy(record.cigar, cigar_str);
fwrite(&record, sizeof(record), 1, pileup_fp);
if(close_now) fclose(pileup_fp);
last_window_no = window_no;
}
}
return 0;
}
// returns 0 if there is no more sections; or 1.
int next_read_section(char * cigar, unsigned int * chro_offset, unsigned short * read_offset, unsigned short * section_chro_length, int * cigar_cursor)
{
int x=0, local_cigar_cursor = 0;
int nch;
unsigned int ret=0;
unsigned int all_chro_offset = 0;
unsigned short all_read_offset = 0;
unsigned int all_section_chro_length = 0;
while(1)
{
nch = cigar[local_cigar_cursor];
if(isdigit(nch))
{
x = x*10+(nch-'0');
}
else{
if(nch=='M' || nch == 'S')
{
(*chro_offset) = all_chro_offset;
(*read_offset) = all_read_offset;
(*section_chro_length) = all_section_chro_length;
}
if(nch=='M'||nch=='I' || nch=='S')
all_read_offset += x;
if(nch=='D'||nch=='N' || nch=='M' || nch=='S')
all_chro_offset += x;
if(nch=='M'||nch=='S'||nch=='D')
all_section_chro_length += x;
if(nch=='N') all_section_chro_length = 0;
x=0;
if((local_cigar_cursor> (*cigar_cursor) && (nch=='N' || nch == 'I' || nch == 'D')) || cigar[local_cigar_cursor+1] == 0)
{
(*cigar_cursor) = local_cigar_cursor+1;
return !(cigar[local_cigar_cursor+1] == 0);
}
}
local_cigar_cursor++;
if((*cigar_cursor) == local_cigar_cursor)all_section_chro_length = 0;
if( cigar[local_cigar_cursor]==0) return 0;
}
return ret;
}
int transfer_SAM_to_position_table(char * sam_file)
{
char linebuf[max(2*MAX_READ_LENGTH+300,3000)];
int allreads=0,mapped=0;
char mate_chro[MAX_CHROMOSOME_NAME_LEN+1];
unsigned int mate_pos;
int flags_mate;
char cigar_mate[EXON_MAX_CIGAR_LEN+1];
gene_input_t input_file;
HashTable * local_reassembly_pileup_files;
HashTable * chromosome_size_table;
chromosome_size_table = HashTableCreate(100);
HashTableSetDeallocationFunctions(chromosome_size_table, NULL, NULL);
HashTableSetKeyComparisonFunction(chromosome_size_table, my_strcmp);
HashTableSetHashFunction(chromosome_size_table, HashTableStringHashFunction);
local_reassembly_pileup_files = HashTableCreate(100);
HashTableSetDeallocationFunctions(local_reassembly_pileup_files, NULL, NULL);
HashTableSetKeyComparisonFunction(local_reassembly_pileup_files, my_strcmp);
HashTableSetHashFunction(local_reassembly_pileup_files ,HashTableStringHashFunction);
geinput_open_sam(sam_file, &input_file,0);
while(1)
{
unsigned int read_pos =0xffffffff;
char read_name[MAX_READ_NAME_LEN+1];
int flags;
char chro_name[MAX_CHROMOSOME_NAME_LEN+1];
unsigned int pos, pair_dist;
char cigar[EXON_MAX_CIGAR_LEN+1];
char read_text[MAX_READ_LENGTH+1];
char qual_text[MAX_READ_LENGTH+1];
int read_len, is_repeated, mapping_quality, is_anchor_certain=1;
int pos_delta = 0;
int is_paired_end_reads = 0;
unsigned long long int file_offset = ftello(input_file.input_fp);
if(feof((FILE *)input_file.input_fp))break;
read_text[0]=0;
qual_text[0]=0;
geinput_readline(&input_file, linebuf,0);
int res = parse_SAM_line(linebuf, read_name,& flags, chro_name, & pos, cigar, & mapping_quality, & pair_dist, read_text , qual_text, & read_len, & is_repeated);
int cigar_cursor = 0;
int firstM = 1,xx=0;
is_paired_end_reads = flags &1;
if(res==0)
{
for(; cigar[cigar_cursor]; cigar_cursor++)
{
char nch = cigar[cigar_cursor];
if(nch>='0'&&nch<='9')
{
xx=xx*10+(nch-'0');
}else
{
if(nch=='M') firstM=0;
else if(nch=='S')
{
if(firstM)
pos_delta = xx;
}
xx=0;
}
}
pos -= pos_delta;
}
if(res == 1) {chro_name[0]='*'; chro_name[1]=0;}
//printf("MAPPED=%d\n", res);
if(res == 0) // mapped
{
read_pos = pos - 1;
mapped++;
}
else if(res == 1 && is_paired_end_reads) // unmapped
{
is_anchor_certain=0;
if(mate_chro[0])
{
if(mate_chro[0]!='*')
{
read_pos = mate_pos ;
strcpy(chro_name, mate_chro);
}
//printf("RECOVERED 1: %u - %s ; LEN=%d ; YOU_ARE_FIRST=%d\n%s\n%s\n", read_anchor_position, read_name, read_len, flags_mate & SAM_FLAG_FIRST_READ_IN_PAIR, read_text, qual_text);
}
else
{
char read_text_null[MAX_READ_LENGTH+1];
char qual_text_null[MAX_READ_LENGTH+1];
geinput_readline_back(&input_file, linebuf);
res = parse_SAM_line(linebuf, read_name,& flags_mate, mate_chro, & mate_pos, cigar_mate, & mapping_quality, & pair_dist, read_text_null , qual_text_null, & read_len, & is_repeated);
if(res==0)
{
read_pos = mate_pos - 1;
strcpy(chro_name, mate_chro);
}
//printf("RECOVERED 2: %u - %s ; LEN=%d ; YOU_ARE_FIRST=%d\n%s\n%s\n", read_anchor_position, read_name, read_len, flags_mate & SAM_FLAG_FIRST_READ_IN_PAIR, read_text, qual_text);
}
}
if(read_pos<0xffff0000)
{
unsigned int read_span = calculate_read_span(cigar);
write_read_pos(chro_name, read_pos, read_len, flags, file_offset, cigar, read_span, local_reassembly_pileup_files);
unsigned int old_max_pos = HashTableGet(chromosome_size_table, chro_name) - NULL;
if(old_max_pos==0)
{
char * chro_name_new = malloc(strlen(chro_name)+1);
strcpy(chro_name_new , chro_name);
HashTablePut(chromosome_size_table, chro_name_new, NULL+read_pos + read_span+1);
}
else if(read_pos + read_span +1 > old_max_pos )
HashTablePutReplace(chromosome_size_table, chro_name, NULL+read_pos + read_span+1, 0);
}
if(allreads %2==1)
mate_chro[0] = 0;
else
{
strcpy(mate_chro, chro_name);
mate_pos = pos - 1;
flags_mate = flags;
}
allreads++;
}
SUBREADprintf("Processed %d reads; %d mapped.\n", allreads, mapped);
destroy_pileup_table(local_reassembly_pileup_files);
finalise_sam_index(chromosome_size_table, sam_file);
HashTableDestroy(chromosome_size_table);
return 0;
}
#ifdef MAKE_STANDALONE
int main(int argc , char ** argv)
{
//progress_report_callback = NULL;
#else
int samindex_main(int argc , char ** argv)
{
#endif
char c;
char in_SAM_file[MAX_FILE_NAME_LENGTH+1];
optind = 1;
opterr = 1;
optopt = 63;
while ((c = getopt(argc, argv, "i:?")) != -1)
{
switch(c)
{
case 'i':
strncpy(in_SAM_file, optarg, MAX_FILE_NAME_LENGTH);
break;
}
}
sprintf(temp_file_prefix, "./index-temp-sum-%06u-%06u", getpid(),rand());
int ret = 0;
ret = transfer_SAM_to_position_table(in_SAM_file);
return ret;
}
//removed.
......@@ -43,7 +43,7 @@ unsigned long long get_inner_pair(global_context_t * global_context , subread_re
bigtable_cached_result_t * bigtable_retrieve_cache(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int is_second_read, int load_more);
void bigtable_readonly_result(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int result_number, int is_second_read, mapping_result_t * return_ptr, subjunc_result_t * return_junction_ptr){
int rlen = -1;
if(global_context -> bigtable_cache_file_fp){
int loadjunc;
......@@ -67,7 +67,9 @@ void bigtable_readonly_result(global_context_t * global_context , thread_context
void * write_ptr = return_ptr;
if(loadjunc) write_ptr = return_junction_ptr;
fread(write_ptr, loadjunc?sizeof(subjunc_result_t):sizeof(mapping_result_t), 1, global_context -> bigtable_cache_file_fp);
rlen = fread(write_ptr, loadjunc?sizeof(subjunc_result_t):sizeof(mapping_result_t), 1, global_context -> bigtable_cache_file_fp);
if(rlen <1)
SUBREADprintf("UNABLE TO READ RESULT\n");
}
}else{
bigtable_cached_result_t * rett = bigtable_retrieve_cache(global_context , thread_context , pair_number, is_second_read,0);
......@@ -123,7 +125,7 @@ int init_bigtable_results(global_context_t * global_context, int is_rewinding)
if(global_context -> config.use_memory_buffer)
global_context -> bigtable_cache_file_fp = NULL;
else {
char tmpfname[MAX_FILE_NAME_LENGTH];
char tmpfname[MAX_FILE_NAME_LENGTH+33];
sprintf(tmpfname, "%s-%02d-align.bin", global_context -> config.temp_file_prefix, 0);
//if(is_rewinding) unlink(tmpfname);
......@@ -195,9 +197,6 @@ bigtable_cached_result_t * bigtable_retrieve_cache(global_context_t * global_con
if(global_context -> bigtable_cache_file_fp){
//SUBREADprintf("MARK_OCCPY=%lld BY THREAD %d\n", pair_number, thread_context ? thread_context -> thread_id : -1);
if(global_context -> bigtable_cache_file_loaded_fragments_begin == -1 || inner_pair_number >= global_context -> bigtable_cache_file_loaded_fragments_begin + global_context -> bigtable_chunked_fragments || inner_pair_number < global_context -> bigtable_cache_file_loaded_fragments_begin)
{
SUBREADprintf("THREAD # %d WAITING FOR %llu for RETRIEVE %llu\n", thread_context? thread_context -> thread_id:-1, global_context -> bigtable_cache_file_loaded_fragments_begin, pair_number);
......@@ -205,10 +204,11 @@ bigtable_cached_result_t * bigtable_retrieve_cache(global_context_t * global_con
}
bigtable_lock(global_context);
//SUBREADprintf("inner_pair_number=%lld, fragments_begin=%lld\n", inner_pair_number, global_context -> bigtable_cache_file_loaded_fragments_begn[thread_no]);
if(global_context -> bigtable_cache_file_loaded_fragments_begin == -1 || inner_pair_number >= global_context -> bigtable_cache_file_loaded_fragments_begin + global_context -> bigtable_chunked_fragments || inner_pair_number < global_context -> bigtable_cache_file_loaded_fragments_begin)
{
long long load_end_pair_no = load_start_pair_no + global_context -> bigtable_chunked_fragments;
int rlen = -1;
// this function will see if there is data to write or not.
bigtable_write_thread_cache(global_context);
......@@ -224,17 +224,35 @@ bigtable_cached_result_t * bigtable_retrieve_cache(global_context_t * global_con
{
for(xk2 = 0; xk2 < 1 + global_context -> input_reads.is_paired_end_reads; xk2++){
bigtable_cached_result_t * current_cache = global_context -> bigtable_cache + xk1* (1+global_context -> input_reads.is_paired_end_reads) + xk2;
fread( current_cache -> big_margin_data , sizeof(short) * 3 * global_context -> config.big_margin_record_size , 1, global_context -> bigtable_cache_file_fp );
fread( current_cache -> alignment_res , sizeof(mapping_result_t) * global_context -> config.multi_best_reads , 1, global_context -> bigtable_cache_file_fp );
rlen = fread( current_cache -> big_margin_data , sizeof(short) * 3 * global_context -> config.big_margin_record_size , 1, global_context -> bigtable_cache_file_fp );
if(rlen < 1){
SUBREADprintf("ERROR: cannot read margin\n");
return NULL;
}
if(global_context -> config.do_breakpoint_detection)
fread( current_cache -> subjunc_res , sizeof(subjunc_result_t) * global_context -> config.multi_best_reads , 1, global_context -> bigtable_cache_file_fp);
rlen = fread( current_cache -> alignment_res , sizeof(mapping_result_t) * global_context -> config.multi_best_reads , 1, global_context -> bigtable_cache_file_fp );
if(rlen < 1){
SUBREADprintf("ERROR: cannot read margin\n");
return NULL;
}
if(global_context -> config.do_breakpoint_detection){
rlen = fread( current_cache -> subjunc_res , sizeof(subjunc_result_t) * global_context -> config.multi_best_reads , 1, global_context -> bigtable_cache_file_fp);
if(rlen < 1){
SUBREADprintf("ERROR: cannot read margin\n");
return NULL;
}
}
}
}
}else{
long long new_file_size = calc_file_location(load_end_pair_no);
//SUBREADprintf("FILE_TRUNCATE %lld\n", load_start_pair_no);
ftruncate(fileno(global_context -> bigtable_cache_file_fp), new_file_size);
rlen = ftruncate(fileno(global_context -> bigtable_cache_file_fp), new_file_size);
if(rlen != 0){
SUBREADprintf("ERROR: cannot truncate file\n");
return NULL;
}
global_context -> bigtable_cache_file_fragments = load_end_pair_no;
int xk1, xk2;
for(xk1 = 0; xk1 < global_context -> bigtable_chunked_fragments; xk1++)
......@@ -313,7 +331,7 @@ int finalise_bigtable_results(global_context_t * global_context){
if(global_context -> bigtable_cache_file_fp){
fclose(global_context -> bigtable_cache_file_fp);
char tmpfname[MAX_FILE_NAME_LENGTH];
char tmpfname[MAX_FILE_NAME_LENGTH+33];
sprintf(tmpfname, "%s-%02d-align.bin", global_context -> config.temp_file_prefix, 0);
unlink(tmpfname);
}
......
......@@ -564,7 +564,7 @@ void remove_neighbour(global_context_t * global_context)
}
}
if((1 && global_context-> config.do_fusion_detection || 1 && global_context-> config.do_long_del_detection) && event_body -> event_type == CHRO_EVENT_TYPE_FUSION)
if((global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection) && event_body -> event_type == CHRO_EVENT_TYPE_FUSION)
{
for(xk2=-10 ; xk2 < 10 ; xk2++)
{
......@@ -736,10 +736,6 @@ int init_indel_thread_contexts(global_context_t * global_context, thread_context
memset(indel_thread_context -> final_reads_mismatches_array , 0, sizeof(unsigned short)*indel_context -> total_events);
memset(indel_thread_context -> final_counted_reads_array , 0, sizeof(unsigned short)*indel_context -> total_events);
thread_context -> output_buffer = malloc(sizeof(output_fragment_buffer_t) * global_context -> config.reported_multi_best_reads * MULTI_THREAD_OUTPUT_ITEMS);
thread_context -> output_buffer_item = 0;
thread_context -> output_buffer_pointer = 0;
subread_init_lock(&thread_context -> output_lock);
}
......@@ -1160,7 +1156,6 @@ int finalise_indel_and_junction_thread(global_context_t * global_context, thread
}
free(indel_thread_context -> final_counted_reads_array);
free(indel_thread_context -> final_reads_mismatches_array);
free(thread_context -> output_buffer);
free(indel_thread_context);
}
}
......@@ -1425,7 +1420,7 @@ void put_new_event(HashTable * event_table, chromosome_event_t * new_event , int
mark_event_bitmap(event_table->appendix2, sides[1]);
}
}
int search_event(global_context_t * global_context, HashTable * event_table, chromosome_event_t * event_space, unsigned int pos, int search_type, char event_type, chromosome_event_t ** return_buffer)
int search_event(global_context_t * global_context, HashTable * event_table, chromosome_event_t * event_space, unsigned int pos, int search_type, unsigned char event_type, chromosome_event_t ** return_buffer)
{
int ret = 0;
......@@ -2037,7 +2032,7 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
{
if(first_correct_base - last_correct_base < -min(0,indels)) continue;
int best_j=0, best_score=-99999, is_ambiguous_indel= 0;
int best_score=-99999, is_ambiguous_indel= 0;
unsigned int best_pos = 0;
int cutoff = first_correct_base-last_correct_base + min(0, indels) - global_context -> config.flanking_subread_indel_mismatch;
int mid_j = (first_correct_base-last_correct_base )/2, min_j=99990;
......@@ -2064,7 +2059,6 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
{
mismatched_bases = first_correct_base-last_correct_base + min(0, indels) - score;
best_score = score;
best_j = j;
// best_pos here is the absolute offset of the FIRST UNWANTED BASE.
best_pos = voting_position + last_correct_base + j + last_indel;
min_j = abs(mid_j-j);
......@@ -2236,32 +2230,6 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
}
void print_indel_table(global_context_t * global_context){
int xk1;
indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
HashTable * entry_table = indel_context -> event_entry_table;
for(xk1 = 0; xk1 < indel_context -> total_events ; xk1++){
chromosome_event_t * event_body = indel_context -> event_space_dynamic +xk1;
//printf("OCT27-STEP-INTAB-TYPE-%d POS %u~%u GID=%u PV %d %d SUP %d / %d\n", event_body -> event_type, event_body -> event_small_side, event_body -> event_large_side, event_body -> global_event_id, event_body -> connected_next_event_distance, event_body -> connected_previous_event_distance , event_body -> supporting_reads , event_body -> anti_supporting_reads);
}
int bucket;
KeyValuePair * cursor;
for(bucket=0; bucket< entry_table -> numOfBuckets; bucket++){
cursor = entry_table -> bucketArray[bucket];
while(cursor){
unsigned int entry_pos = cursor->key - NULL;
int * env_array = cursor->value;
int env_i;
for(env_i = 1; env_array[env_i]; env_i++){
chromosome_event_t * event_body = indel_context -> event_space_dynamic + (env_array[env_i]-1);
// printf("OCT27-STEPQ-ENTAB-%u [%d] to %u ~ %u len=%d VAL=%d PTR=%p\n",entry_pos, env_i, event_body -> event_small_side, event_body -> event_large_side, event_body -> indel_length, env_array[env_i], env_array);
}
cursor = cursor->next;
}
}
}
int write_indel_final_results(global_context_t * global_context)
......@@ -2273,8 +2241,8 @@ int write_indel_final_results(global_context_t * global_context)
char * fn2, *ref_bases, *alt_bases;
FILE * ofp = NULL;
fn2 = malloc(MAX_FILE_NAME_LENGTH);
snprintf(fn2, MAX_FILE_NAME_LENGTH, "%s.indel.vcf", global_context->config.output_prefix);
fn2 = malloc(MAX_FILE_NAME_LENGTH+30);
sprintf(fn2, "%s.indel.vcf", global_context->config.output_prefix);
ofp = f_subr_open(fn2, "wb");
......@@ -2481,7 +2449,7 @@ int write_local_reassembly(global_context_t *global_context, HashTable *pileup_f
if(0 == locate_gene_position_max(anchor_pos, &global_context -> chromosome_table, &chro_name, &chro_offset, NULL, NULL, read_len))
{
char temp_file_name[MAX_FILE_NAME_LENGTH];
char temp_file_name[MAX_FILE_NAME_LENGTH+40];
int close_now = 0;
sprintf(temp_file_name,"%s@%s-%04u.bin", global_context -> config.temp_file_prefix, chro_name , chro_offset / BASE_BLOCK_LENGTH );
......@@ -3422,7 +3390,7 @@ int full_indel_alignment(global_context_t * global_context, reassembly_by_voting
{
// indels_in_section = -5: 5 insertion; indels_in_section = 3: 3 deletion
// find the edge.
int best_edge_score = -1, best_left, best_right;
int best_edge_score = -1;
unsigned int section_best_edge = 0;
// the smallest possible location of the second half is the location if the first half + deletion length
......@@ -3438,8 +3406,6 @@ int full_indel_alignment(global_context_t * global_context, reassembly_by_voting
{
best_edge_score = left_matched+right_matched;
section_best_edge = xk2;
best_left = left_matched;
best_right = right_matched;
}
}
......@@ -3521,7 +3487,7 @@ int find_potential_ultralong_indels(global_context_t * global_context , reassemb
unsigned int xk1, is_1_left;
for(is_1_left = 0; is_1_left < 2; is_1_left ++)
{
int best_match = -1, best_c2_location = -1;
int best_match = -1;
char * test_key = is_1_left?contig_2:contig_1;
char * testee = is_1_left?contig_1:contig_2;
......@@ -3532,10 +3498,7 @@ int find_potential_ultralong_indels(global_context_t * global_context , reassemb
{
int test_match = match_str(test_key, testee + xk1, global_context -> config.reassembly_key_length);
if(test_match > best_match)
{
best_match = test_match;
best_c2_location = xk1;
}
}
if(best_match >= global_context -> config.reassembly_key_length-1)
......@@ -3797,11 +3760,24 @@ int finalise_pileup_file_by_voting(global_context_t * global_context , char * te
base_block_temp_read_t read_rec;
int rlen = fread(&read_rec, sizeof(read_rec), 1, tmp_fp);
int is_position_certain = read_rec.strand;
if(rlen<1) break;
fread(&read_len, sizeof(short), 1, tmp_fp);
fread(read_text, sizeof(char), read_len, tmp_fp);
fread(qual_text, sizeof(char), read_len, tmp_fp);
rlen = fread(&read_len, sizeof(short), 1, tmp_fp);
if(rlen < 1){
SUBREADprintf("ERROR: Unable to parse piles\n");
return -1;
}
rlen = fread(read_text, sizeof(char), read_len, tmp_fp);
if(rlen <read_len){
SUBREADprintf("ERROR: Unable to parse piles\n");
return -1;
}
rlen = fread(qual_text, sizeof(char), read_len, tmp_fp);
if(rlen <read_len){
SUBREADprintf("ERROR: Unable to parse piles\n");
return -1;
}
first_base_pos = read_rec.pos;
first_base_pos -= block_no * BASE_BLOCK_LENGTH;
......@@ -3878,9 +3854,23 @@ int finalise_pileup_file_by_voting(global_context_t * global_context , char * te
int is_position_certain = read_rec.strand;
if(rlen<1) break;
fread(&read_len, sizeof(short), 1, tmp_fp);
fread(read_text, sizeof(char), read_len, tmp_fp);
fread(qual_text, sizeof(char), read_len, tmp_fp);
rlen = fread(&read_len, sizeof(short), 1, tmp_fp);
if(rlen <1){
SUBREADprintf("ERROR: cannot parse piles.\n");
return -1;
}
rlen = fread(read_text, sizeof(char), read_len, tmp_fp);
if(rlen <read_len){
SUBREADprintf("ERROR: cannot parse piles.\n");
return -1;
}
rlen = fread(qual_text, sizeof(char), read_len, tmp_fp);
if(rlen <read_len){
SUBREADprintf("ERROR: cannot parse piles.\n");
return -1;
}
first_base_pos = read_rec.pos;
first_base_pos -= block_no * BASE_BLOCK_LENGTH;
assert(first_base_pos>=0);
......@@ -4242,9 +4232,12 @@ int finalise_pileup_file_by_debrujin(global_context_t * global_context , char *
base_block_temp_read_t read_rec;
int rlen = fread(&read_rec, sizeof(read_rec), 1, tmp_fp);
if(rlen<1) break;
fread(&read_len, sizeof(short), 1, tmp_fp);
fread(read_text, sizeof(char), read_len, tmp_fp);
fread(qual_text, sizeof(char), read_len, tmp_fp);
rlen = fread(&read_len, sizeof(short), 1, tmp_fp);
if(rlen<1) return -1;
rlen = fread(read_text, sizeof(char), read_len, tmp_fp);
if(rlen<read_len) return -1;
rlen = fread(qual_text, sizeof(char), read_len, tmp_fp);
if(rlen<read_len) return -1;
first_base_pos = read_rec.pos - block_no * BASE_BLOCK_LENGTH;
insert_pileup_read(global_context , block_context , first_base_pos , read_text , qual_text , read_len, read_rec.read_pos>0);
......@@ -4275,7 +4268,7 @@ int finalise_long_insertions_by_hashtable(global_context_t * global_context)
int chro_i;
assert(global_context -> index_block_number == 1);
unsigned int chro_start_pos = 0;
char tmp_fname[300];
char tmp_fname[350];
sprintf(tmp_fname,"%s.reassembly.fa", global_context->config.output_prefix);
global_context->long_insertion_FASTA_fp = f_subr_open(tmp_fname ,"wb");
......@@ -4286,7 +4279,7 @@ int finalise_long_insertions_by_hashtable(global_context_t * global_context)
unsigned int chro_current_pos = 0;
for(chro_current_pos = 0; chro_current_pos < chro_length ; chro_current_pos += BASE_BLOCK_LENGTH)
{
char temp_file_name[MAX_FILE_NAME_LENGTH];
char temp_file_name[MAX_FILE_NAME_LENGTH + 50];
int block_no = chro_current_pos / BASE_BLOCK_LENGTH;
sprintf(temp_file_name,"%s@%s-%04u.bin", global_context -> config.temp_file_prefix, global_context -> chromosome_table.read_names+chro_i*MAX_CHROMOSOME_NAME_LEN , block_no );
......@@ -4457,6 +4450,7 @@ void init_global_context(global_context_t * context)
context->config.is_BAM_output = 1;
context->config.is_BAM_input = 0;
context->config.is_input_read_order_required = 0;
context->config.read_trim_5 = 0;
context->config.read_trim_3 = 0;
context->config.minimum_exonic_subread_fraction = -1.0;
......
......@@ -167,7 +167,7 @@ int load_known_junctions(global_context_t * global_context);
int finalise_indel_and_junction_thread(global_context_t * global_context, thread_context_t * thread_contexts, int task);
int find_new_indels(global_context_t * global_context, thread_context_t * thread_context, int pair_number, char * read_name, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id);
int write_indel_final_results(global_context_t * context);
int search_event(global_context_t * global_context,HashTable * event_table, chromosome_event_t * event_space, unsigned int pos, int search_type, char event_type, chromosome_event_t ** return_buffer);
int search_event(global_context_t * global_context,HashTable * event_table, chromosome_event_t * event_space, unsigned int pos, int search_type, unsigned char event_type, chromosome_event_t ** return_buffer);
void set_alignment_result(global_context_t * global_context, int pair_number, int is_second_read, int best_read_id, unsigned int position, int votes, gene_vote_number_t * indel_record, short best_cover_start, short best_cover_end, int is_negative_strand, int is_PE, unsigned int minor_position, unsigned int minor_votes, unsigned int minor_coverage_start, unsigned int minor_coverage_end, unsigned int split_point, int inserted_bases, int is_strand_jumped, int is_GT_AG_donors, int used_subreads_in_vote, int noninformative_subreads_in_vote, int major_indel_offset, int minor_indel_offset, int main_hamming, int minor_hamming, int main_quality, int minor_quality);
......
......@@ -61,6 +61,8 @@ static struct option long_options[] =
{"minVoteCutoff", required_argument, 0, 0},
{"maxRealignLocations", required_argument, 0, 0},
{"multiMapping", no_argument, 0, 0},
{"keepReadOrder", no_argument, 0, 0},
{"sortReadsByCoordinates", no_argument, 0, 0},
{0, 0, 0, 0}
};
......@@ -169,6 +171,17 @@ void print_usage_core_aligner()
SUBREADputs("");
SUBREADputs(" --rg <string> Add <tag:value> to the read group (RG) header in the output.");
SUBREADputs("");
SUBREADputs("# read order");
SUBREADputs("");
SUBREADputs(" --keepReadOrder Keep order of reads in BAM output the same as that in the");
SUBREADputs(" input file. Reads from the same pair are always placed next");
SUBREADputs(" to each other no matter this option is specified or not.");
SUBREADputs("");
SUBREADputs(" --sortReadsByCoordinates Output location-sorted reads. This option is");
SUBREADputs(" applicable for BAM output only. A BAI index file is also");
SUBREADputs(" generated for each BAM file so the BAM files can be directly");
SUBREADputs(" loaded into a genome browser.");
SUBREADputs("");
SUBREADputs("# color space reads");
SUBREADputs("");
SUBREADputs(" -b Convert color-space read bases to base-space read bases in");
......@@ -241,6 +254,8 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
opterr = 1;
optopt = 63;
subread_rebuild_cmd(argc, argv, global_context);
global_context->config.entry_program_name = CORE_PROGRAM_SUBREAD;
global_context->config.max_mismatch_exonic_reads = 3;
global_context->config.max_mismatch_junction_reads = 3;
......@@ -467,6 +482,10 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
{
global_context->config.report_multi_mapping_reads = 1;
}
else if(strcmp("ignoreUnmapped", long_options[option_index].name)==0)
{
global_context->config.ignore_unmapped_reads = 1;
}
else if(strcmp("memoryMultiplex", long_options[option_index].name)==0)
{
global_context->config.memory_use_multiplex = atof(optarg);
......@@ -494,6 +513,14 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
global_context->config.is_BAM_input = 1;
global_context->config.is_SAM_file_input = 1;
}
else if(strcmp("keepReadOrder", long_options[option_index].name)==0)
{
global_context->config.is_input_read_order_required=1;
}
else if(strcmp("sortReadsByCoordinates", long_options[option_index].name)==0)
{
global_context->config.sort_reads_by_coordinates=1;
}
else if(strcmp("extraColumns", long_options[option_index].name)==0)
{
global_context->config.SAM_extra_columns=1;
......@@ -516,10 +543,6 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
{
global_context -> config.fast_run = 1;
}
else if(strcmp("ignoreUnmapped", long_options[option_index].name)==0)
{
global_context->config.ignore_unmapped_reads = 1;
}
else if(strcmp("sv", long_options[option_index].name)==0)
{
global_context->config.do_breakpoint_detection = 1;
......