Skip to content
Commits on Source (9)
subread (1.6.4+dfsg-1) UNRELEASED; urgency=medium
* Update d/watch, add uversionmangle to remove -source suffix
* New upstream version 1.6.4+dfsg
* Refresh patches, update typo.patch
* Switch to debhelper-compat, drop d/compat
* Install genRandomReads binary and generate man page
-- Alexandre Mestiashvili <mestia@debian.org> Tue, 30 Apr 2019 10:58:08 +0200
subread (1.6.3+dfsg-1) unstable; urgency=medium
[ Alexandre Mestiashvili ]
......
......@@ -5,7 +5,7 @@ Uploaders: Alexandre Mestiashvili <mestia@debian.org>,
Section: science
Priority: optional
Build-Depends: bc,
debhelper (>= 12~),
debhelper-compat (= 12),
help2man,
python,
zlib1g-dev
......
......@@ -2,7 +2,7 @@ Subject: Fix ftbfs on kfreebsd
From: Alex Mestiashvili <alex@biotec.tu-dresden.de>
--- subread.orig/src/HelperFunctions.c
+++ subread/src/HelperFunctions.c
@@ -791,6 +791,9 @@
@@ -807,6 +807,9 @@
#if defined(FREEBSD) || defined(__MINGW32__)
return 1;
#else
......@@ -12,7 +12,7 @@ From: Alex Mestiashvili <alex@biotec.tu-dresden.de>
#ifdef MACOS
int mib[6], x1, ret = 1;
size_t len;
@@ -883,6 +886,7 @@
@@ -899,6 +902,7 @@
return 1;
#endif
#endif
......
......@@ -2,7 +2,7 @@ Description: Fix typos
Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
--- subread.orig/src/sambam-file.c
+++ subread/src/sambam-file.c
@@ -204,7 +204,7 @@
@@ -205,7 +205,7 @@
return "MIDNSHP=X"[ch];
else
{
......@@ -13,7 +13,16 @@ Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
}
--- subread.orig/src/readSummary.c
+++ subread/src/readSummary.c
@@ -3748,7 +3748,7 @@
@@ -1256,7 +1256,7 @@
}else{
SUBREADprintf("ERROR: no features were loaded in format %s. The annotation format can be specified by the '-F' option%s.\n", file_type == FILE_TYPE_GTF?"GTF":"SAF", file_type == FILE_TYPE_GTF?", and the required feature type can be specified by the '-t' option.":"");
}
- SUBREADprintf("The porgram has to terminate.\n\n");
+ SUBREADprintf("The program has to terminate.\n\n");
return -2;
}
@@ -3785,7 +3785,7 @@
assigned_no++;
}
}
......@@ -22,7 +31,7 @@ Author: Alex Mestiashvili <alex@biotec.tu-dresden.de>
final_feture_names[GENE_NAME_LIST_BUFFER_SIZE-1]=0;
if(RG_name){
@@ -6503,7 +6503,7 @@
@@ -6560,7 +6560,7 @@
else if(optarg[0]=='5')
reduce_5_3_ends_to_one = REDUCE_TO_5_PRIME_END;
else{
......
......@@ -93,6 +93,9 @@ The output file is compatible with featureCounts program' \
to a SAF format file.' \
$(utildir)/flattenGTF| debian/filter.pl > $(mandir)/flattenGTF.1;
$(HELP2MAN) --name='helper tool from subread suite' \
$(utildir)/genRandomReads| debian/filter.pl > $(mandir)/genRandomReads.1;
dh_auto_install
override_dh_installexamples-indep:
......
......@@ -12,3 +12,4 @@ bin/utilities/repair usr/lib/subread
bin/utilities/subread-fullscan usr/lib/subread
bin/utilities/txUnique usr/lib/subread
bin/utilities/flattenGTF usr/lib/subread
bin/utilities/genRandomReads usr/lib/subread
version=4
opts=dversionmangle=s/\+dfsg(\.\d+)?$//,repacksuffix=+dfsg,repack,compression=xz \
opts=uversionmangle=s/-source//,dversionmangle=s/\+dfsg(\.\d+)?$//,repacksuffix=+dfsg,repack,compression=xz \
http://sf.net/subread/subread-@ANY_VERSION@-source@ARCHIVE_EXT@
This diff is collapsed.
This diff is collapsed.
......@@ -20,6 +20,7 @@
#ifndef __HELPER_FUNCTIONS_H_
#define __HELPER_FUNCTIONS_H_
#include "subread.h"
#include "hashtable.h"
#define PARSE_STATUS_TAGNAME 1
......@@ -31,6 +32,25 @@ typedef struct{
HashTable * size_table;
} fasta_contigs_t;
#ifndef MAKE_STANDALONE
typedef struct{
ArrayList * message_queue;
int is_thread_mode;
subread_lock_t queue_lock;
subread_lock_t queue_notifier;
int is_finished;
} message_queue_t;
extern message_queue_t mt_message_queue;
#endif
void msgqu_init();
void msgqu_destroy();
void msgqu_main_loop();
void msgqu_notifyFinish();
void msgqu_printf(const char * fmt, ...);
int read_contig_fasta(fasta_contigs_t * tab, char * fname);
int get_contig_fasta(fasta_contigs_t * tab, char * chro, unsigned int pos, int len, char * out_bases);
void destroy_contig_fasta(fasta_contigs_t * tab);
......@@ -77,4 +97,143 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
HashTable * load_alias_table(char * fname) ;
char * get_short_fname(char * lname);
// Rebuild a string containing the command line.
// Return the string length (without the terminating \0)
// You need to free(*lineptr) after all.
int rebuild_command_line(char ** lineptr, int argc, char ** argv);
// Calculate a full round of MD5 or SHA256.
void Helper_md5sum(char * plain_txt, int plain_len, unsigned char * bin_md5_buff);
typedef unsigned int HelpFuncMD5_u32plus;
typedef struct {
HelpFuncMD5_u32plus lo, hi;
HelpFuncMD5_u32plus a, b, c, d;
unsigned char buffer[64];
HelpFuncMD5_u32plus block[16];
} HelpFuncMD5_CTX;
void HelpFuncMD5_Init(HelpFuncMD5_CTX *ctx);
void HelpFuncMD5_Update(HelpFuncMD5_CTX *ctx, const void *data, unsigned long size);
void HelpFuncMD5_Final(unsigned char *result, HelpFuncMD5_CTX *ctx);
void Helper_sha256sum(char * plain_txt, int plain_len, unsigned char * bin_md5_buff);
unsigned long long plain_txt_to_long_rand(char * plain_txt, int plain_len);
// give me a p, I give you the value such that Pr( x < value ) == p in a 0/1 normal distribution.
double inverse_sample_normal(double p);
// big number functions
// retrived from https://github.com/kokke/tiny-TNbignum-c
// "This is free and unencumbered software released into the public domain."
#include <stdint.h>
#include <assert.h>
/* This macro defines the word size in bytes of the array that constitues the big-number data structure. */
#ifndef WORD_SIZE
#define WORD_SIZE 4
#endif
/* Size of big-numbers in bytes */
#define BN_MAXIMUM_BITS 4096
#define BN_ARRAY_SIZE ( BN_MAXIMUM_BITS / 8 / WORD_SIZE )
/* Here comes the compile-time specialization for how large the underlying array size should be. */
/* The choices are 1, 2 and 4 bytes in size with uint32, uint64 for WORD_SIZE==4, as temporary. */
#ifndef WORD_SIZE
#error Must define WORD_SIZE to be 1, 2, 4
#elif (WORD_SIZE == 1)
/* Data type of array in structure */
#define DTYPE uint8_t
/* bitmask for getting MSB */
#define DTYPE_MSB ((DTYPE_TMP)(0x80))
/* Data-type larger than DTYPE, for holding intermediate results of calculations */
#define DTYPE_TMP uint32_t
/* sprintf format string */
#define SPRINTF_FORMAT_STR "%.02x"
#define SSCANF_FORMAT_STR "%2hhx"
/* Max value of integer type */
#define MAX_VAL ((DTYPE_TMP)0xFF)
#elif (WORD_SIZE == 2)
#define DTYPE uint16_t
#define DTYPE_TMP uint32_t
#define DTYPE_MSB ((DTYPE_TMP)(0x8000))
#define SPRINTF_FORMAT_STR "%.04x"
#define SSCANF_FORMAT_STR "%4hx"
#define MAX_VAL ((DTYPE_TMP)0xFFFF)
#elif (WORD_SIZE == 4)
#define DTYPE uint32_t
#define DTYPE_TMP uint64_t
#define DTYPE_MSB ((DTYPE_TMP)(0x80000000))
#define SPRINTF_FORMAT_STR "%.08x"
#define SSCANF_FORMAT_STR "%8x"
#define MAX_VAL ((DTYPE_TMP)0xFFFFFFFF)
#endif
#ifndef DTYPE
#error DTYPE must be defined to uint8_t, uint16_t uint32_t or whatever
#endif
/* Custom assert macro - easy to disable */
#define require(p, msg) assert(p && #msg)
/* Data-holding structure: array of DTYPEs */
struct bn
{
DTYPE array[BN_ARRAY_SIZE];
};
/* Tokens returned by TNbignum_cmp() for value comparison */
enum { SMALLER = -1, EQUAL = 0, LARGER = 1 };
/* Initialization functions: */
void TNbignum_init(struct bn* n);
void TNbignum_from_int(struct bn* n, DTYPE_TMP i);
int TNbignum_to_int(struct bn* n);
void TNbignum_from_string(struct bn* n, char* str, int nbytes);
// warning: maxsize MUST >= 1026
void TNbignum_to_string(struct bn* n, char* str, int maxsize);
/* Basic arithmetic operations: */
void TNbignum_add(struct bn* a, struct bn* b, struct bn* c); /* c = a + b */
void TNbignum_sub(struct bn* a, struct bn* b, struct bn* c); /* c = a - b */
void TNbignum_mul(struct bn* a, struct bn* b, struct bn* c); /* c = a * b */
void TNbignum_div(struct bn* a, struct bn* b, struct bn* c); /* c = a / b */
void TNbignum_mod(struct bn* a, struct bn* b, struct bn* c); /* c = a % b */
void TNbignum_divmod(struct bn* a, struct bn* b, struct bn* c, struct bn* d); /* c = a/b, d = a%b */
/* Bitwise operations: */
void TNbignum_and(struct bn* a, struct bn* b, struct bn* c); /* c = a & b */
void TNbignum_or(struct bn* a, struct bn* b, struct bn* c); /* c = a | b */
void TNbignum_xor(struct bn* a, struct bn* b, struct bn* c); /* c = a ^ b */
void TNbignum_lshift(struct bn* a, struct bn* b, int nbits); /* b = a << nbits */
void TNbignum_rshift(struct bn* a, struct bn* b, int nbits); /* b = a >> nbits */
/* Special operators and comparison */
int TNbignum_cmp(struct bn* a, struct bn* b); /* Compare: returns LARGER, EQUAL or SMALLER */
int TNbignum_is_zero(struct bn* n); /* For comparison with zero */
void TNbignum_inc(struct bn* n); /* Increment: add one to n */
void TNbignum_dec(struct bn* n); /* Decrement: subtract one from n */
void TNbignum_pow(struct bn* a, struct bn* b, struct bn* c); /* Calculate a^b -- e.g. 2^10 => 1024 */
void TNbignum_isqrt(struct bn* a, struct bn* b); /* Integer square root -- e.g. isqrt(5) => 2*/
void TNbignum_assign(struct bn* dst, struct bn* src); /* Copy src into dst -- dst := src */
#endif
......@@ -17,11 +17,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # samMappedBases mergeVCF testZlib
all: genRandomReads detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # samMappedBases mergeVCF testZlib
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv detectionCall repair propmapped qualityScores removeDup subread-fullscan txUnique flattenGTF ../bin/utilities
mv detectionCall genRandomReads repair propmapped qualityScores removeDup subread-fullscan txUnique flattenGTF ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -37,6 +37,9 @@ sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
rm -f longread-one/*.o
cd longread-one && $(MAKE)
genRandomReads: gen_rand_reads.c ${ALL_OBJECTS}
${CC} -o genRandomReads gen_rand_reads.c ${ALL_OBJECTS} ${LDFLAGS}
flattenGTF: flattenAnnotations.c ${ALL_OBJECTS}
${CC} -o flattenGTF flattenAnnotations.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -11,11 +11,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # globalReassembly testZlib
all: genRandomReads sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # globalReassembly testZlib
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair subread-fullscan qualityScores removeDup propmapped flattenGTF ../bin/utilities
mv repair genRandomReads subread-fullscan qualityScores removeDup propmapped flattenGTF ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -31,6 +31,9 @@ sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
rm -f longread-one/*.o
cd longread-one && $(MAKE)
genRandomReads: gen_rand_reads.c ${ALL_OBJECTS}
${CC} -o genRandomReads gen_rand_reads.c ${ALL_OBJECTS} ${LDFLAGS}
flattenGTF: flattenAnnotations.c ${ALL_OBJECTS}
${CC} -o flattenGTF flattenAnnotations.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -88,6 +88,7 @@ struct SNP_Calling_Parameters{
gene_offset_t * subread_index_offsets;
gene_value_index_t * subread_index_array;
HashTable * cigar_event_table;
char * rebuilt_command_line;
unsigned long long int all_mapped_bases;
unsigned int fisher_normalisation_target;
......@@ -476,6 +477,9 @@ void fishers_test_on_POI(struct SNP_Calling_Parameters * parameters, float * snp
}
}
}
#define KEEP_WINDOW_SAME_SIZE 0
void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * snp_fisher_raw, unsigned int * snp_voting_piles, char * referenced_genome, unsigned int reference_len, double multiplex_base, char * SNP_bitmap_recorder, unsigned short * snp_filter_background_matched, unsigned short * snp_filter_background_unmatched, int all_result_needed, char * chro_name, unsigned int block_start) {
int i, j, is_fresh_jumppd, remove_old;
......@@ -491,8 +495,7 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
{
a=0; c=0; is_fresh_jumppd = 1;
if(i>=0)
{
if(i>=0) {
char true_value = referenced_genome[i];
int true_value_int = (true_value=='A'?0:(true_value=='C'?1:(true_value=='G'?2:3)));
......@@ -506,7 +509,7 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
// if the POI reached in this step is not fresh (known SNPs with < 80% support)
if(SNP_bitmap_recorder && is_snp_bitmap(SNP_bitmap_recorder,i) && (a *4 >= c))
if(KEEP_WINDOW_SAME_SIZE && SNP_bitmap_recorder && is_snp_bitmap(SNP_bitmap_recorder,i))
{
is_fresh_jumppd = 0;
go_ahead --;
......@@ -529,18 +532,25 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
mm += snp_voting_piles[ (i+parameters -> fisher_exact_testlen + go_ahead) *4 + j ] ;
}
if((!SNP_bitmap_recorder)||(!is_snp_bitmap(SNP_bitmap_recorder, go_ahead + i + parameters -> fisher_exact_testlen)) || (mm *4 < pm))
unsigned int chro_pos = block_start + i + parameters -> fisher_exact_testlen;
if(0 && chro_pos < 16084550+12 && chro_pos > 16084550-12){
SUBREADprintf("ADD-BASE-AT-RIGHT : %s : %u : KNOWN-SNP : %d a b c d : %d %d %d %d A 4 > C : %d\n" , chro_name, chro_pos, is_snp_bitmap(SNP_bitmap_recorder,i + parameters -> fisher_exact_testlen), a,b,c,d, (a *4 >= c));
}
// if this is not POI : add this matched/unmatched into "flanking+PoI" counts.
if((!SNP_bitmap_recorder)||(!is_snp_bitmap(SNP_bitmap_recorder, go_ahead + i + parameters -> fisher_exact_testlen)))
{
d += pm;
b += mm;
is_added = 1;
}
if(is_added) break;
if(is_added || (!KEEP_WINDOW_SAME_SIZE)) break;
else go_ahead++;
}
}
// test the middle base
// a : mismatched at POI ; c : matched at POI
if(i>=0 && a > 0){
float observed_coverage = (b+d) *1./(1. + 2 * parameters -> fisher_exact_testlen) ;
......@@ -550,7 +560,14 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
int flanking_unmatched = b-a;
int flanking_matched = d-c;
if(SNP_bitmap_recorder && is_snp_bitmap(SNP_bitmap_recorder,i) && (a *4 >= c))
unsigned int chro_pos = block_start + i;
if(0 && chro_pos < 16084550+12 && chro_pos > 16084550-12){
SUBREADprintf("KNOWN-SNP-AT-PoI at %s : %u : KNOWN-SNP : %d a b c d : %d %d %d %d A 4 > C : %d\n" , chro_name, chro_pos, is_snp_bitmap(SNP_bitmap_recorder,i), a,b,c,d, (a *4 >= c));
}
if(0 && SNP_bitmap_recorder && is_snp_bitmap(SNP_bitmap_recorder,i) && (a *4 >= c)) // no reason to include the current POI into the flanking window, even if it is a SNP.
{
flanking_unmatched = b;
flanking_matched = d;
......@@ -558,9 +575,9 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
float p_middle = fisher_exact_test(a, flanking_unmatched, c, flanking_matched);
unsigned int chro_pos = 1 + block_start + i;
if(0 && chro_pos < 11109861+10 && chro_pos > 11109861-10)
SUBREADprintf("TEST: %s : %u a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d; p=%G; p-cut=%G\n", chro_name, chro_pos ,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail, p_middle, p_cutoff);
if(0 && chro_pos < 16084550+12 && chro_pos > 16084550-12){
SUBREADprintf("TEST: %s : %u a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d; p=%G; p-cut=%G\n KNOWN-SNP : %d A 4 > C : %d\n", chro_name, chro_pos ,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail, p_middle, p_cutoff, is_snp_bitmap(SNP_bitmap_recorder,i), (a *4 >= c));
}
if(all_result_needed || ( p_middle < p_cutoff && flanking_matched*20>(flanking_matched+ flanking_unmatched )*16))
snp_fisher_raw [i] = p_middle;
......@@ -590,20 +607,20 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
else
mm += snp_voting_piles[ (i-parameters -> fisher_exact_testlen-left_tail) *4 + j ] ;
}
if((!SNP_bitmap_recorder)||!is_snp_bitmap(SNP_bitmap_recorder, i - parameters -> fisher_exact_testlen-left_tail) || (mm*4<pm))
if((!SNP_bitmap_recorder)||!is_snp_bitmap(SNP_bitmap_recorder, i - parameters -> fisher_exact_testlen-left_tail))
{
d-=pm;
b-=mm;
is_removed =1;
}
if(is_removed) break;
else left_tail--;
if(is_removed || !KEEP_WINDOW_SAME_SIZE) break;
else if(KEEP_WINDOW_SAME_SIZE) left_tail--;
}
}
// if a fresh base is at POI, the tail is removed in next step.
if(is_fresh_jumppd)
if(is_fresh_jumppd || !KEEP_WINDOW_SAME_SIZE)
remove_old = 1;
else{
left_tail ++;
......@@ -922,7 +939,7 @@ int process_snp_votes(FILE *out_fp, unsigned int offset , unsigned int reference
sprintf( BGC_Qvalue_str, ";CTRL_DP=%d;CTRL_MM=%d;CTRL_QV=%.4f;VS_QV=%.4f",BGC_all_reads, BGC_alt_reads, BGC_Qvalue,max(0,VS_Qvalue));
}
snprintf(sprint_line,999, "%s\t%u\t.\t%c\t%s\t%.4f\t.\tDP=%d;MM=%s;BGTOTAL=%d;BGMM=%d%s\n", chro_name, BASE_BLOCK_LENGTH*block_no +1 + i, true_value,base_list, Qvalue, all_reads, supporting_list , snp_filter_background_matched[i]+snp_filter_background_unmatched[i], snp_filter_background_unmatched[i], BGC_Qvalue_str);
snprintf(sprint_line,999, "%s\t%u\t.\t%c\t%s\t%.4f\t.\tDP=%d;MMsum=%d;MM=%s;BGTOTAL=%d;BGMM=%d%s\n", chro_name, BASE_BLOCK_LENGTH*block_no +1 + i, true_value,base_list, Qvalue, all_reads, POI_MM, supporting_list , snp_filter_background_matched[i]+snp_filter_background_unmatched[i], snp_filter_background_unmatched[i], BGC_Qvalue_str);
if(parameters->output_fp_lock)
subread_lock_occupy(parameters->output_fp_lock);
int sprint_line_len = strlen(sprint_line);
......@@ -1204,6 +1221,7 @@ int parse_read_lists_maybe_threads(char * in_FASTA_file, char * out_BED_file, ch
SUBREADprintf("Cannot open the output file: '%s'\n", out_BED_file);
}
fputs("##fileformat=VCFv4.0\n",out_fp);
fprintf(out_fp, "##exactSNP_Commandline=%s\n", parameters -> rebuilt_command_line);
fputs("##comment=The QUAL values for the SNPs in this VCF file are calculated as min(40, - log_10 (p_value)), where p_value is from the Fisher's Exact Test. The QUAL values for the Indels in this VCF file are always 1.0.\n", out_fp);
fputs("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Read Depth\">\n", out_fp);
fputs("##INFO=<ID=BGMM,Number=1,Type=Integer,Description=\"Number of mismatched bases in the background (for SNP only)\">\n", out_fp);
......@@ -1385,9 +1403,6 @@ int SNP_calling(char * in_SAM_file, char * out_BED_file, char * in_FASTA_file, c
chromosome_t * known_chromosomes;
unsigned int real_read_count=0;
double start_time = miltime();
unsigned short rand48_seed[3];
int i, fpos;
signal (SIGTERM, EXSNP_SIGINT_hook);
......@@ -1453,7 +1468,6 @@ int SNP_calling(char * in_SAM_file, char * out_BED_file, char * in_FASTA_file, c
HashTableSetDeallocationFunctions(parameters-> cigar_event_table, NULL, NULL);
HashTableSetKeyComparisonFunction(parameters-> cigar_event_table, my_strcmp);
memcpy(rand48_seed, &start_time, 6);
char mac_rand[13];
mac_or_rand_str(mac_rand);
......@@ -1473,7 +1487,7 @@ int SNP_calling(char * in_SAM_file, char * out_BED_file, char * in_FASTA_file, c
strncpy(one_fn, in_SAM_file+fpos0, fpos-fpos0);
one_fn[fpos-fpos0]=0;
if(break_SAM_file(one_fn, parameters -> is_BAM_file_input, temp_file_prefix, &real_read_count, &parameters->all_blocks, known_chromosomes, 1, parameters -> bases_ignored_head_tail, parameters->subread_index_array, parameters->subread_index_offsets, &parameters -> all_mapped_bases, parameters-> cigar_event_table, parameters->known_SNP_vcf)) return -1;
if(break_SAM_file(one_fn, parameters -> is_BAM_file_input, temp_file_prefix, &real_read_count, &parameters->all_blocks, known_chromosomes, 1, parameters -> bases_ignored_head_tail, parameters->subread_index_array, parameters->subread_index_offsets, &parameters -> all_mapped_bases, parameters-> cigar_event_table, parameters->known_SNP_vcf, NULL, 1,0)) return -1;
if(!in_SAM_file[fpos]) break;
fpos++;
}
......@@ -1481,7 +1495,7 @@ int SNP_calling(char * in_SAM_file, char * out_BED_file, char * in_FASTA_file, c
{
char temp_file_prefix2[350];
sprintf(temp_file_prefix2, "%sBGC-", temp_file_prefix);
if(break_SAM_file(parameters -> background_input_file, parameters -> is_BAM_file_input, temp_file_prefix2, NULL, NULL, known_chromosomes, 1, parameters -> bases_ignored_head_tail, parameters->subread_index_array, parameters->subread_index_offsets, NULL, NULL, NULL)) return -1;
if(break_SAM_file(parameters -> background_input_file, parameters -> is_BAM_file_input, temp_file_prefix2, NULL, NULL, known_chromosomes, 1, parameters -> bases_ignored_head_tail, parameters->subread_index_array, parameters->subread_index_offsets, NULL, NULL, NULL, NULL, 1,0)) return -1;
}
......@@ -1575,6 +1589,8 @@ void print_usage_snp(char * myname)
SUBREADputs("");
SUBREADputs(" -v output version of the program.");
SUBREADputs("");
SUBREADputs(" -C <path> Specify the path to save the temporary files.");
SUBREADputs("");
SUBREADputs("Example:");
SUBREADputs("");
SUBREADputs(" ./exactSNP -i my-alignment.sam -g mm10.fa -o my-SNPs.txt");
......@@ -1625,6 +1641,7 @@ int main_snp_calling_test(int argc,char ** argv)
memset(&parameters, 0, sizeof(struct SNP_Calling_Parameters));
rebuild_command_line(&parameters . rebuilt_command_line, argc, argv);
parameters.start_time = miltime();
parameters.empty_blocks = 0;
parameters.reported_SNPs = 0;
......@@ -1668,7 +1685,7 @@ int main_snp_calling_test(int argc,char ** argv)
print_usage_snp(argv[0]);
return 0;
}
while ((c = getopt_long (argc, argv, "7:N:a:i:g:o:bQ:p:f:n:r:x:w:s:t:T:v4",snp_long_options, &optindex))!=-1)
while ((c = getopt_long (argc, argv, "7:N:C:a:i:g:o:bQ:p:f:n:r:x:w:s:t:T:v4",snp_long_options, &optindex))!=-1)
{
switch (c)
{
......@@ -1747,6 +1764,10 @@ int main_snp_calling_test(int argc,char ** argv)
parameters.max_supporting_read_number = atof(optarg);
break;
case 'C':
strncpy(temp_path, optarg,299);
break;
case 'r':
parameters.min_supporting_read_number = atof(optarg);
break;
......@@ -1828,6 +1849,7 @@ int main_snp_calling_test(int argc,char ** argv)
print_in_box(80,0,0," Input file : %s (%s)", get_short_fname(in_SAM_file), parameters.is_BAM_file_input?"BAM":"SAM");
print_in_box(80,0,0," Output file : %s", get_short_fname(out_BED_file));
print_in_box(80,0,0," Reference genome : %s", get_short_fname(in_FASTA_file));
print_in_box(80,0,0," Temp path : %s", temp_path);
print_in_box(80,0,1,"");
print_in_box(80,0,0," Threads : %d", threads);
print_in_box(80,0,0," Min supporting reads : %d", parameters.min_supporting_read_number);
......@@ -1842,7 +1864,7 @@ int main_snp_calling_test(int argc,char ** argv)
print_in_box(80,0,0," Known SNP annotations : %s", get_short_fname(parameters.known_SNP_vcf));
print_in_box(80,0,1,"");
print_in_box(80,2,1,"http://subread.sourceforge.net/");
print_in_box(80,2,1,"");
SUBREADputs("");
......@@ -1857,6 +1879,9 @@ int main_snp_calling_test(int argc,char ** argv)
warning_file_type(in_FASTA_file, FILE_TYPE_FASTA);
warning_file_limit();
int x1;
if(temp_path[0]==0){
// temp path = out_dir
for(x1 = strlen(out_BED_file); x1 >= 0; x1--){
if(out_BED_file[x1]=='/'){
memcpy(temp_path, out_BED_file, x1);
......@@ -1865,6 +1890,7 @@ int main_snp_calling_test(int argc,char ** argv)
}
}
if(temp_path[0]==0)strcpy(temp_path, "./");
}
ret = SNP_calling(in_SAM_file, out_BED_file, in_FASTA_file, temp_path, read_count, threads, &parameters);
if(ret != -1)
{
......@@ -1884,7 +1910,7 @@ int main_snp_calling_test(int argc,char ** argv)
print_in_box(80,0,1,"");
print_in_box(80,0,0," Running time : %.1f minutes", (miltime() - parameters.start_time)/60);
print_in_box(80,0,1,"");
print_in_box(80,2,1,"http://subread.sourceforge.net/");
print_in_box(80,2,1,"");
SUBREADputs("");
}
......
......@@ -281,7 +281,7 @@ int print_configuration_forindel(global_context_t * global_context)
print_in_box(80,0,0," Expected Paired distance : %d", global_context->config.expected_pair_distance);
print_in_box(80,0,1,"");
print_in_box(80,2,1,"http://subread.sourceforge.net/");
print_in_box(80,2,1,"");
SUBREADputs("");
print_in_box(80,1,1,"Running");
print_in_box(80,0,1,"");
......@@ -303,7 +303,7 @@ int print_summary(global_context_t * global_context)
print_in_box(80,0,0," De novo indels : %u", global_context -> all_indels);
print_in_box(80,0,0," Time cost : %.1f minutes.", timepass/60);
print_in_box(80,0,1,"");
print_in_box(80,2,1,"http://subread.sourceforge.net/");
print_in_box(80,2,1,"");
return 0;
}
......
......@@ -1291,7 +1291,9 @@ typedef struct {
} do_load_juncs_context_t;
int do_juncs_add_feature(char * gene_name, char * transcript_id, char * chro_name, unsigned int feature_start, unsigned int feature_end, int is_negative_strand, void * context){
//#warning ">>>>>>> COMMENt NEXT <<<<<<<<<<<<<<"
//#warning ">>>>>>>>>>>>>>>>> COMMENT NEXT <<<<<<<<<<<<<<<<<<<<"
//return 0;
//#warning ">>>>>>>>>>>>>>>>> COMMENT NEXT <<<<<<<<<<<<<<<<<<<<"
//SUBREADprintf("INJ LOCS: %s : %u, %u\n", chro_name, feature_start, feature_end);
do_load_juncs_context_t * do_load_juncs_context = context;
HashTable * feature_sorting_table = do_load_juncs_context -> feature_sorting_table;
......@@ -1462,8 +1464,6 @@ void put_new_event(HashTable * event_table, chromosome_event_t * new_event , int
unsigned int * id_list = HashTableGet(event_table, NULL+sides[xk1]);
if(!id_list)
{
//#warning "====== DO NOT NEED TO CLEAR THE MEMORY BUFFER! MALLOC IS GOOD ======"
//id_list = calloc(sizeof(unsigned int),EVENT_ENTRIES_INIT_SIZE);
id_list = malloc(sizeof(unsigned int)*(1+EVENT_ENTRIES_INIT_SIZE));
id_list[0]=EVENT_ENTRIES_INIT_SIZE;
id_list[1]=0;
......@@ -1510,9 +1510,6 @@ int search_event(global_context_t * global_context, HashTable * event_table, chr
int current_size = res[0]&0x0fffffff;
for(xk2=1; xk2< current_size+1 ; xk2++)
{
if(0 && res[xk2] > 520000){
SUBREADprintf("TOO LARGE EVENT : %u ; POS=%d/%u\n", res[xk2] , xk2, res[0]);
}
if(!res[xk2])break;
//if(res[xk2] - 1>= ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> current_max_event_number ) { SUBREADprintf("FATAL ERROR: Event id out-of-boundary: %u > %u!\n", res[xk2], ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> current_max_event_number ); continue;}
chromosome_event_t * event_body = &event_space[res[xk2]-1];
......@@ -2520,7 +2517,7 @@ int write_local_reassembly(global_context_t *global_context, HashTable *pileup_f
FILE * pileup_fp = get_temp_file_pointer(temp_file_name, pileup_fp_table, &close_now);
//assert(read_len == strlen(read_text) && read_len > 90);
write_read_block_file(pileup_fp , 0, read_name, 0, chro_name , chro_offset, NULL, 0, read_text , qual_text, read_len , 1 , is_anchor_certain , anchor_pos, read_len);
write_read_block_file(pileup_fp , 0, read_name, 0, chro_name , chro_offset, NULL, 0, read_text , qual_text, read_len , 1 , is_anchor_certain , anchor_pos, read_len, 0);
if(close_now) fclose(pileup_fp);
}
......@@ -4545,9 +4542,12 @@ void init_global_context(global_context_t * context)
context->config.all_threads = 1;
context->config.is_first_iteration_running = 1;
context->config.is_second_iteration_running = 1;
context->config.reads_per_chunk = 1024*1024*1024;
context->config.reads_per_chunk = 20*1024*1024;
//#warning "=========== 2*1024*1024 IS FOR TESTING BLOCKING AND SHOULD BE COMMENTED ==============="
// context->config.reads_per_chunk = 2*1024*1024;
context->config.use_memory_buffer = 1;
context->config.is_methylation_reads = 0;
context->config.report_no_unpaired_reads = 0;
......@@ -4588,7 +4588,7 @@ void init_global_context(global_context_t * context)
int seed_rand[2];
double double_time = miltime();
memcpy(seed_rand, &double_time, 2*sizeof(int));
myrand_srand(seed_rand[0]^seed_rand[1]);
myrand_srand(seed_rand[0]^seed_rand[1]); // the seed is NOT used in R because myrand_srand will always takes four random numbers from R's RNG
context->config.max_indel_length = 5;
context->config.phred_score_format = FASTQ_PHRED33;
......
......@@ -31,7 +31,7 @@
//#define MAX_EVENT_ENTRIES_PER_SITE 12
//
#define EVENT_ENTRIES_INIT_SIZE (9)
#define MAX_EVENT_ENTRIES_PER_SITE (9)
#define MAX_EVENT_ENTRIES_PER_SITE 9
#define CHRO_EVENT_TYPE_REMOVED 0
#define CHRO_EVENT_TYPE_INDEL 8
#define CHRO_EVENT_TYPE_LONG_INDEL 16
......
......@@ -7,6 +7,7 @@
#include "subread.h"
#include "input-files.h"
#include "core.h"
#include "HelperFunctions.h"
static struct option long_options[] =
{
......@@ -84,7 +85,8 @@ void print_usage_core_aligner()
SUBREADputs(" -r <string> Name of an input read file. If paired-end, this should be");
SUBREADputs(" the first read file (typically containing \"R1\"in the file");
SUBREADputs(" name) and the second should be provided via \"-R\".");
SUBREADputs(" Acceptable formats include gzipped FASTQ, FASTQ and FASTA.");
SUBREADputs(" Acceptable formats include gzipped FASTQ, FASTQ, gzipped");
SUBREADputs(" FASTA and FASTA.");
SUBREADputs(" These formats are identified automatically.");
SUBREADputs(" ");
SUBREADputs(" -t <int> Type of input sequencing data. Its values include");
......@@ -663,6 +665,7 @@ int main_align(int argc , char ** argv)
// printf("SIZE_OF_ALN=%d\n", sizeof(mapping_result_t));
// printf("SIZE_OF_VOT=%d\n", sizeof(voting_context_t));
return core_main(argc, argv, parse_opts_aligner);
int ret = core_main(argc, argv, parse_opts_aligner);
return ret;
}
......@@ -7,7 +7,7 @@
#include "subread.h"
#include "input-files.h"
#include "core.h"
#include "HelperFunctions.h"
static struct option long_options[] =
{
......@@ -88,7 +88,8 @@ void print_usage_core_subjunc()
SUBREADputs(" -r <string> Name of an input read file. If paired-end, this should be");
SUBREADputs(" the first read file (typically containing \"R1\"in the file");
SUBREADputs(" name) and the second should be provided via \"-R\".");
SUBREADputs(" Acceptable formats include gzipped FASTQ, FASTQ and FASTA.");
SUBREADputs(" Acceptable formats include gzipped FASTQ, FASTQ, gzipped");
SUBREADputs(" FASTA and FASTA.");
SUBREADputs(" These formats are identified automatically.");
SUBREADputs("");
SUBREADputs("## Optional arguments:");
......@@ -694,6 +695,7 @@ int subread_subjunc_main(int argc , char ** argv)
int main_junction(int argc , char ** argv)
{
#endif
return core_main(argc, argv, parse_opts_subjunc);
int ret = core_main(argc, argv, parse_opts_subjunc);
return ret;
}
......@@ -4031,6 +4031,9 @@ void find_new_junctions(global_context_t * global_context, thread_context_t * th
int new_event_type =(((global_context -> config.entry_program_name == CORE_PROGRAM_SUBJUNC && global_context -> config.do_fusion_detection)||(global_context -> config.entry_program_name == CORE_PROGRAM_SUBJUNC && global_context -> config.do_long_del_detection))&& !global_context -> config.prefer_donor_receptor_junctions)?CHRO_EVENT_TYPE_FUSION:CHRO_EVENT_TYPE_JUNCTION;
//#warning "=========================== DELETE NEXT LINE !!! =================================="
//new_event_type = CHRO_EVENT_TYPE_REMOVED;
if(is_strand_jumped) new_event_type = CHRO_EVENT_TYPE_FUSION;
if((subjunc_result->minor_coverage_start > result->confident_coverage_start) + (subjunc_result -> minor_position > result -> selected_position) ==1)
new_event_type = CHRO_EVENT_TYPE_FUSION;
......