New upstream version 1.6.4+dfsg

parent edf71ef6
This diff is collapsed.
This diff is collapsed.
......@@ -20,6 +20,7 @@
#ifndef __HELPER_FUNCTIONS_H_
#define __HELPER_FUNCTIONS_H_
#include "subread.h"
#include "hashtable.h"
#define PARSE_STATUS_TAGNAME 1
......@@ -31,6 +32,25 @@ typedef struct{
HashTable * size_table;
} fasta_contigs_t;
#ifndef MAKE_STANDALONE
typedef struct{
ArrayList * message_queue;
int is_thread_mode;
subread_lock_t queue_lock;
subread_lock_t queue_notifier;
int is_finished;
} message_queue_t;
extern message_queue_t mt_message_queue;
#endif
void msgqu_init();
void msgqu_destroy();
void msgqu_main_loop();
void msgqu_notifyFinish();
void msgqu_printf(const char * fmt, ...);
int read_contig_fasta(fasta_contigs_t * tab, char * fname);
int get_contig_fasta(fasta_contigs_t * tab, char * chro, unsigned int pos, int len, char * out_bases);
void destroy_contig_fasta(fasta_contigs_t * tab);
......@@ -77,4 +97,143 @@ int load_features_annotation(char * file_name, int file_type, char * gene_id_col
HashTable * load_alias_table(char * fname) ;
char * get_short_fname(char * lname);
// Rebuild a string containing the command line.
// Return the string length (without the terminating \0)
// You need to free(*lineptr) after all.
int rebuild_command_line(char ** lineptr, int argc, char ** argv);
// Calculate a full round of MD5 or SHA256.
void Helper_md5sum(char * plain_txt, int plain_len, unsigned char * bin_md5_buff);
typedef unsigned int HelpFuncMD5_u32plus;
typedef struct {
HelpFuncMD5_u32plus lo, hi;
HelpFuncMD5_u32plus a, b, c, d;
unsigned char buffer[64];
HelpFuncMD5_u32plus block[16];
} HelpFuncMD5_CTX;
void HelpFuncMD5_Init(HelpFuncMD5_CTX *ctx);
void HelpFuncMD5_Update(HelpFuncMD5_CTX *ctx, const void *data, unsigned long size);
void HelpFuncMD5_Final(unsigned char *result, HelpFuncMD5_CTX *ctx);
void Helper_sha256sum(char * plain_txt, int plain_len, unsigned char * bin_md5_buff);
unsigned long long plain_txt_to_long_rand(char * plain_txt, int plain_len);
// give me a p, I give you the value such that Pr( x < value ) == p in a 0/1 normal distribution.
double inverse_sample_normal(double p);
// big number functions
// retrived from https://github.com/kokke/tiny-TNbignum-c
// "This is free and unencumbered software released into the public domain."
#include <stdint.h>
#include <assert.h>
/* This macro defines the word size in bytes of the array that constitues the big-number data structure. */
#ifndef WORD_SIZE
#define WORD_SIZE 4
#endif
/* Size of big-numbers in bytes */
#define BN_MAXIMUM_BITS 4096
#define BN_ARRAY_SIZE ( BN_MAXIMUM_BITS / 8 / WORD_SIZE )
/* Here comes the compile-time specialization for how large the underlying array size should be. */
/* The choices are 1, 2 and 4 bytes in size with uint32, uint64 for WORD_SIZE==4, as temporary. */
#ifndef WORD_SIZE
#error Must define WORD_SIZE to be 1, 2, 4
#elif (WORD_SIZE == 1)
/* Data type of array in structure */
#define DTYPE uint8_t
/* bitmask for getting MSB */
#define DTYPE_MSB ((DTYPE_TMP)(0x80))
/* Data-type larger than DTYPE, for holding intermediate results of calculations */
#define DTYPE_TMP uint32_t
/* sprintf format string */
#define SPRINTF_FORMAT_STR "%.02x"
#define SSCANF_FORMAT_STR "%2hhx"
/* Max value of integer type */
#define MAX_VAL ((DTYPE_TMP)0xFF)
#elif (WORD_SIZE == 2)
#define DTYPE uint16_t
#define DTYPE_TMP uint32_t
#define DTYPE_MSB ((DTYPE_TMP)(0x8000))
#define SPRINTF_FORMAT_STR "%.04x"
#define SSCANF_FORMAT_STR "%4hx"
#define MAX_VAL ((DTYPE_TMP)0xFFFF)
#elif (WORD_SIZE == 4)
#define DTYPE uint32_t
#define DTYPE_TMP uint64_t
#define DTYPE_MSB ((DTYPE_TMP)(0x80000000))
#define SPRINTF_FORMAT_STR "%.08x"
#define SSCANF_FORMAT_STR "%8x"
#define MAX_VAL ((DTYPE_TMP)0xFFFFFFFF)
#endif
#ifndef DTYPE
#error DTYPE must be defined to uint8_t, uint16_t uint32_t or whatever
#endif
/* Custom assert macro - easy to disable */
#define require(p, msg) assert(p && #msg)
/* Data-holding structure: array of DTYPEs */
struct bn
{
DTYPE array[BN_ARRAY_SIZE];
};
/* Tokens returned by TNbignum_cmp() for value comparison */
enum { SMALLER = -1, EQUAL = 0, LARGER = 1 };
/* Initialization functions: */
void TNbignum_init(struct bn* n);
void TNbignum_from_int(struct bn* n, DTYPE_TMP i);
int TNbignum_to_int(struct bn* n);
void TNbignum_from_string(struct bn* n, char* str, int nbytes);
// warning: maxsize MUST >= 1026
void TNbignum_to_string(struct bn* n, char* str, int maxsize);
/* Basic arithmetic operations: */
void TNbignum_add(struct bn* a, struct bn* b, struct bn* c); /* c = a + b */
void TNbignum_sub(struct bn* a, struct bn* b, struct bn* c); /* c = a - b */
void TNbignum_mul(struct bn* a, struct bn* b, struct bn* c); /* c = a * b */
void TNbignum_div(struct bn* a, struct bn* b, struct bn* c); /* c = a / b */
void TNbignum_mod(struct bn* a, struct bn* b, struct bn* c); /* c = a % b */
void TNbignum_divmod(struct bn* a, struct bn* b, struct bn* c, struct bn* d); /* c = a/b, d = a%b */
/* Bitwise operations: */
void TNbignum_and(struct bn* a, struct bn* b, struct bn* c); /* c = a & b */
void TNbignum_or(struct bn* a, struct bn* b, struct bn* c); /* c = a | b */
void TNbignum_xor(struct bn* a, struct bn* b, struct bn* c); /* c = a ^ b */
void TNbignum_lshift(struct bn* a, struct bn* b, int nbits); /* b = a << nbits */
void TNbignum_rshift(struct bn* a, struct bn* b, int nbits); /* b = a >> nbits */
/* Special operators and comparison */
int TNbignum_cmp(struct bn* a, struct bn* b); /* Compare: returns LARGER, EQUAL or SMALLER */
int TNbignum_is_zero(struct bn* n); /* For comparison with zero */
void TNbignum_inc(struct bn* n); /* Increment: add one to n */
void TNbignum_dec(struct bn* n); /* Decrement: subtract one from n */
void TNbignum_pow(struct bn* a, struct bn* b, struct bn* c); /* Calculate a^b -- e.g. 2^10 => 1024 */
void TNbignum_isqrt(struct bn* a, struct bn* b); /* Integer square root -- e.g. isqrt(5) => 2*/
void TNbignum_assign(struct bn* dst, struct bn* src); /* Copy src into dst -- dst := src */
#endif
......@@ -17,11 +17,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # samMappedBases mergeVCF testZlib
all: genRandomReads detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # samMappedBases mergeVCF testZlib
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv detectionCall repair propmapped qualityScores removeDup subread-fullscan txUnique flattenGTF ../bin/utilities
mv detectionCall genRandomReads repair propmapped qualityScores removeDup subread-fullscan txUnique flattenGTF ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -37,6 +37,9 @@ sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
rm -f longread-one/*.o
cd longread-one && $(MAKE)
genRandomReads: gen_rand_reads.c ${ALL_OBJECTS}
${CC} -o genRandomReads gen_rand_reads.c ${ALL_OBJECTS} ${LDFLAGS}
flattenGTF: flattenAnnotations.c ${ALL_OBJECTS}
${CC} -o flattenGTF flattenAnnotations.c ${ALL_OBJECTS} ${LDFLAGS}
......
......@@ -11,11 +11,11 @@ ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # globalReassembly testZlib
all: genRandomReads sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # globalReassembly testZlib
mkdir -p ../bin/utilities
mv longread-one/LRM longread-one/sublong
mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
mv repair subread-fullscan qualityScores removeDup propmapped flattenGTF ../bin/utilities
mv repair genRandomReads subread-fullscan qualityScores removeDup propmapped flattenGTF ../bin/utilities
@echo
@echo "###########################################################"
@echo "# #"
......@@ -31,6 +31,9 @@ sublong: longread-one/longread-mapping.c ${ALL_OBJECTS}
rm -f longread-one/*.o
cd longread-one && $(MAKE)
genRandomReads: gen_rand_reads.c ${ALL_OBJECTS}
${CC} -o genRandomReads gen_rand_reads.c ${ALL_OBJECTS} ${LDFLAGS}
flattenGTF: flattenAnnotations.c ${ALL_OBJECTS}
${CC} -o flattenGTF flattenAnnotations.c ${ALL_OBJECTS} ${LDFLAGS}
......
This diff is collapsed.
......@@ -281,7 +281,7 @@ int print_configuration_forindel(global_context_t * global_context)
print_in_box(80,0,0," Expected Paired distance : %d", global_context->config.expected_pair_distance);
print_in_box(80,0,1,"");
print_in_box(80,2,1,"http://subread.sourceforge.net/");
print_in_box(80,2,1,"");
SUBREADputs("");
print_in_box(80,1,1,"Running");
print_in_box(80,0,1,"");
......@@ -303,7 +303,7 @@ int print_summary(global_context_t * global_context)
print_in_box(80,0,0," De novo indels : %u", global_context -> all_indels);
print_in_box(80,0,0," Time cost : %.1f minutes.", timepass/60);
print_in_box(80,0,1,"");
print_in_box(80,2,1,"http://subread.sourceforge.net/");
print_in_box(80,2,1,"");
return 0;
}
......
......@@ -1291,7 +1291,9 @@ typedef struct {
} do_load_juncs_context_t;
int do_juncs_add_feature(char * gene_name, char * transcript_id, char * chro_name, unsigned int feature_start, unsigned int feature_end, int is_negative_strand, void * context){
//#warning ">>>>>>> COMMENt NEXT <<<<<<<<<<<<<<"
//#warning ">>>>>>>>>>>>>>>>> COMMENT NEXT <<<<<<<<<<<<<<<<<<<<"
//return 0;
//#warning ">>>>>>>>>>>>>>>>> COMMENT NEXT <<<<<<<<<<<<<<<<<<<<"
//SUBREADprintf("INJ LOCS: %s : %u, %u\n", chro_name, feature_start, feature_end);
do_load_juncs_context_t * do_load_juncs_context = context;
HashTable * feature_sorting_table = do_load_juncs_context -> feature_sorting_table;
......@@ -1462,8 +1464,6 @@ void put_new_event(HashTable * event_table, chromosome_event_t * new_event , int
unsigned int * id_list = HashTableGet(event_table, NULL+sides[xk1]);
if(!id_list)
{
//#warning "====== DO NOT NEED TO CLEAR THE MEMORY BUFFER! MALLOC IS GOOD ======"
//id_list = calloc(sizeof(unsigned int),EVENT_ENTRIES_INIT_SIZE);
id_list = malloc(sizeof(unsigned int)*(1+EVENT_ENTRIES_INIT_SIZE));
id_list[0]=EVENT_ENTRIES_INIT_SIZE;
id_list[1]=0;
......@@ -1510,9 +1510,6 @@ int search_event(global_context_t * global_context, HashTable * event_table, chr
int current_size = res[0]&0x0fffffff;
for(xk2=1; xk2< current_size+1 ; xk2++)
{
if(0 && res[xk2] > 520000){
SUBREADprintf("TOO LARGE EVENT : %u ; POS=%d/%u\n", res[xk2] , xk2, res[0]);
}
if(!res[xk2])break;
//if(res[xk2] - 1>= ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> current_max_event_number ) { SUBREADprintf("FATAL ERROR: Event id out-of-boundary: %u > %u!\n", res[xk2], ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> current_max_event_number ); continue;}
chromosome_event_t * event_body = &event_space[res[xk2]-1];
......@@ -2520,7 +2517,7 @@ int write_local_reassembly(global_context_t *global_context, HashTable *pileup_f
FILE * pileup_fp = get_temp_file_pointer(temp_file_name, pileup_fp_table, &close_now);
//assert(read_len == strlen(read_text) && read_len > 90);
write_read_block_file(pileup_fp , 0, read_name, 0, chro_name , chro_offset, NULL, 0, read_text , qual_text, read_len , 1 , is_anchor_certain , anchor_pos, read_len);
write_read_block_file(pileup_fp , 0, read_name, 0, chro_name , chro_offset, NULL, 0, read_text , qual_text, read_len , 1 , is_anchor_certain , anchor_pos, read_len, 0);
if(close_now) fclose(pileup_fp);
}
......@@ -4545,9 +4542,12 @@ void init_global_context(global_context_t * context)
context->config.all_threads = 1;
context->config.is_first_iteration_running = 1;
context->config.is_second_iteration_running = 1;
context->config.reads_per_chunk = 1024*1024*1024;
context->config.reads_per_chunk = 20*1024*1024;
//#warning "=========== 2*1024*1024 IS FOR TESTING BLOCKING AND SHOULD BE COMMENTED ==============="
// context->config.reads_per_chunk = 2*1024*1024;
context->config.use_memory_buffer = 1;
context->config.is_methylation_reads = 0;
context->config.report_no_unpaired_reads = 0;
......@@ -4588,7 +4588,7 @@ void init_global_context(global_context_t * context)
int seed_rand[2];
double double_time = miltime();
memcpy(seed_rand, &double_time, 2*sizeof(int));
myrand_srand(seed_rand[0]^seed_rand[1]);
myrand_srand(seed_rand[0]^seed_rand[1]); // the seed is NOT used in R because myrand_srand will always takes four random numbers from R's RNG
context->config.max_indel_length = 5;
context->config.phred_score_format = FASTQ_PHRED33;
......
......@@ -31,7 +31,7 @@
//#define MAX_EVENT_ENTRIES_PER_SITE 12
//
#define EVENT_ENTRIES_INIT_SIZE (9)
#define MAX_EVENT_ENTRIES_PER_SITE (9)
#define MAX_EVENT_ENTRIES_PER_SITE 9
#define CHRO_EVENT_TYPE_REMOVED 0
#define CHRO_EVENT_TYPE_INDEL 8
#define CHRO_EVENT_TYPE_LONG_INDEL 16
......
......@@ -7,6 +7,7 @@
#include "subread.h"
#include "input-files.h"
#include "core.h"
#include "HelperFunctions.h"
static struct option long_options[] =
{
......@@ -84,7 +85,8 @@ void print_usage_core_aligner()
SUBREADputs(" -r <string> Name of an input read file. If paired-end, this should be");
SUBREADputs(" the first read file (typically containing \"R1\"in the file");
SUBREADputs(" name) and the second should be provided via \"-R\".");
SUBREADputs(" Acceptable formats include gzipped FASTQ, FASTQ and FASTA.");
SUBREADputs(" Acceptable formats include gzipped FASTQ, FASTQ, gzipped");
SUBREADputs(" FASTA and FASTA.");
SUBREADputs(" These formats are identified automatically.");
SUBREADputs(" ");
SUBREADputs(" -t <int> Type of input sequencing data. Its values include");
......@@ -663,6 +665,7 @@ int main_align(int argc , char ** argv)
// printf("SIZE_OF_ALN=%d\n", sizeof(mapping_result_t));
// printf("SIZE_OF_VOT=%d\n", sizeof(voting_context_t));
return core_main(argc, argv, parse_opts_aligner);
int ret = core_main(argc, argv, parse_opts_aligner);
return ret;
}
......@@ -7,7 +7,7 @@
#include "subread.h"
#include "input-files.h"
#include "core.h"
#include "HelperFunctions.h"
static struct option long_options[] =
{
......@@ -88,7 +88,8 @@ void print_usage_core_subjunc()
SUBREADputs(" -r <string> Name of an input read file. If paired-end, this should be");
SUBREADputs(" the first read file (typically containing \"R1\"in the file");
SUBREADputs(" name) and the second should be provided via \"-R\".");
SUBREADputs(" Acceptable formats include gzipped FASTQ, FASTQ and FASTA.");
SUBREADputs(" Acceptable formats include gzipped FASTQ, FASTQ, gzipped");
SUBREADputs(" FASTA and FASTA.");
SUBREADputs(" These formats are identified automatically.");
SUBREADputs("");
SUBREADputs("## Optional arguments:");
......@@ -694,6 +695,7 @@ int subread_subjunc_main(int argc , char ** argv)
int main_junction(int argc , char ** argv)
{
#endif
return core_main(argc, argv, parse_opts_subjunc);
int ret = core_main(argc, argv, parse_opts_subjunc);
return ret;
}
......@@ -4031,6 +4031,9 @@ void find_new_junctions(global_context_t * global_context, thread_context_t * th
int new_event_type =(((global_context -> config.entry_program_name == CORE_PROGRAM_SUBJUNC && global_context -> config.do_fusion_detection)||(global_context -> config.entry_program_name == CORE_PROGRAM_SUBJUNC && global_context -> config.do_long_del_detection))&& !global_context -> config.prefer_donor_receptor_junctions)?CHRO_EVENT_TYPE_FUSION:CHRO_EVENT_TYPE_JUNCTION;
//#warning "=========================== DELETE NEXT LINE !!! =================================="
//new_event_type = CHRO_EVENT_TYPE_REMOVED;
if(is_strand_jumped) new_event_type = CHRO_EVENT_TYPE_FUSION;
if((subjunc_result->minor_coverage_start > result->confident_coverage_start) + (subjunc_result -> minor_position > result -> selected_position) ==1)
new_event_type = CHRO_EVENT_TYPE_FUSION;
......
This diff is collapsed.
......@@ -13,7 +13,7 @@ int main(int argc, char ** argv)
for(chro_offset = 0; chro_offset<500000; chro_offset++)
{
if(chro_offset % 70==0) puts("");
char nch_i = rand() % 4 ;
char nch_i = myrand_rand() % 4 ;
char nch = nch_i?(nch_i<2?'T':(nch_i < 3?'C':'G')):'A';
putchar(nch);
......
This diff is collapsed.
......@@ -2651,7 +2651,6 @@ void remove_indel_neighbours(HashTable * indel_table)
void print_version_info()
{
SUBREADprintf("\nSubread %s\n", SUBREAD_VERSION);
SUBREADprintf("http://www.sourceforge.net/projects/subread/\n");
SUBREADprintf("\n");
}
......
......@@ -193,7 +193,7 @@ int gvindex_load(gene_value_index_t * index, const char filename [])
int read_length;
read_length = fread(&index->start_point,4,1, fp);
if(read_length<1){
SUBREADprintf("ERROR: the array index is incomplete : %d", read_length );
SUBREADprintf("ERROR: the array index is incomplete : %d\n", read_length );
return 1;
}
read_length = fread(&index->length,4,1, fp);
......@@ -217,7 +217,7 @@ int gvindex_load(gene_value_index_t * index, const char filename [])
read_length =fread(index->values, 1, useful_bytes+1, fp);
if(read_length < useful_bytes){
SUBREADprintf("ERROR: the array index is incomplete : %d < %d.", read_length, useful_bytes+1 );
SUBREADprintf("ERROR: the array index is incomplete : %d < %d.\n", read_length, useful_bytes+1 );
return 1;
}
......
......@@ -6,7 +6,7 @@
* Released to the public domain.
*
*--------------------------------------------------------------------------
* $Id: hashtable.c,v 9999.23 2018/05/05 00:47:52 cvs Exp $
* $Id: hashtable.c,v 9999.32 2019/01/31 03:15:09 cvs Exp $
\*--------------------------------------------------------------------------*/
#include <stdio.h>
......@@ -23,6 +23,40 @@ static unsigned long pointerHashFunction(const void *pointer);
static int isProbablePrime(long number);
static long calculateIdealNumOfBuckets(HashTable *hashTable);
long long_random_val(){
long ret = 0;
if(RAND_MAX<255){
SUBREADprintf("Is this a embedded computer????\n");
return -1;
}
int i;
for(i=0;i<8;i++){
if(i>0)ret = (ret << 8) ^ (myrand_rand() & 0xff);
else ret = (ret << 8) ^ (myrand_rand() & 0x7f);
}
return ret;
}
void * ArrayListShift(ArrayList * list){
if(list->numOfElements<1) return NULL;
void *ret = list->elementList [0];
long xx;
list->numOfElements -- ;
for(xx=0; xx<list->numOfElements; xx++) list->elementList [ xx ] = list->elementList [ xx+1 ];
return ret;
}
void * ArrayListPop(ArrayList * list){
if(list->numOfElements<1) return NULL;
return list->elementList [ -- list->numOfElements];
}
void * ArrayListRandom(ArrayList * list){
long ii = long_random_val() % list -> numOfElements;
return list -> elementList[ii];
}
ArrayList * ArrayListCreate(int init_capacity){
ArrayList * ret = malloc(sizeof(ArrayList));
......@@ -130,6 +164,36 @@ void ArrayListSort(ArrayList * list, int compare_L_minus_R(void * L_elem, void *
merge_sort(sortdata, list -> numOfElements, ArrayListSort_compare, ArrayListSort_exchange, ArrayListSort_merge);
}
int ArrayListLLUComparison(void * L_elem, void * R_elem){
unsigned long long lint = L_elem-NULL;
unsigned long long rint = R_elem-NULL;
if(lint<rint)return -1;
if(lint>rint)return 1;
return 0;
}
long ArrayListFindNextDent(ArrayList * list, unsigned long long value_less_than_dent){
long h=list->numOfElements-1,l=0,m=-1l;
if( list -> elementList[h]- NULL <= value_less_than_dent )return -1l;
while(h>l){
m=(h+l)/2;
unsigned long long mv = list -> elementList[m] - NULL;
if(mv < value_less_than_dent) l=m+1;
else if(mv > value_less_than_dent) h=m-1;
else break;
}
if(m<2)m=0;
else m-=2;
for(; m>=0 && list -> elementList[m] - NULL >= value_less_than_dent; m--);
for(m = max(0, m); m < list->numOfElements ; m++){
if( list -> elementList[m] - NULL > value_less_than_dent )return m;
}
SUBREADprintf("ALGORITHM ERROR!! DID YOU SORT THE LIST???\n");
return -2l;
}
/*--------------------------------------------------------------------------*\
* NAME:
* HashTableCreate() - creates a new HashTable
......@@ -346,13 +410,13 @@ int HashTablePutReplaceEx(HashTable *hashTable, const void *key, void *value, in
if(replace_key) {
if(hashTable->keyDeallocator && dealloc_key)
hashTable->keyDeallocator((void *) pair->key);
pair->key = key;
pair->key = (void *)key;
}
}
if (pair->value != value) {
if (hashTable->valueDeallocator != NULL && dealloc_value)
hashTable->valueDeallocator(pair->value);
pair->value = value;
pair->value = (void *)value;
}
}
else {
......@@ -361,8 +425,8 @@ int HashTablePutReplaceEx(HashTable *hashTable, const void *key, void *value, in
return -1;
}
else {
newPair->key = key;
newPair->value = value;
newPair->key = (void *)key;
newPair->value = (void *)value;
newPair->next = hashTable->bucketArray[hashValue];
hashTable->bucketArray[hashValue] = newPair;
hashTable->numOfElements++;
......@@ -399,6 +463,18 @@ int HashTablePutReplace(HashTable *hashTable, const void *key, void *value, int
* doesn't exist in the HashTable
\*--------------------------------------------------------------------------*/
void *HashTableGetKey(const HashTable *hashTable, const void *key) {
long hashValue = hashTable->hashFunction(key) % hashTable->numOfBuckets;
KeyValuePair *pair = hashTable->bucketArray[hashValue];
while (pair != NULL && hashTable->keycmp(key, pair->key) != 0)
pair = pair->next;
return (pair == NULL)? NULL : pair->key;
}
void *HashTableGet(const HashTable *hashTable, const void *key) {
long hashValue = hashTable->hashFunction(key) % hashTable->numOfBuckets;
......
......@@ -6,7 +6,7 @@
* Released to the public domain.
*
*--------------------------------------------------------------------------
* $Id: hashtable.h,v 9999.16 2018/05/05 00:47:52 cvs Exp $
* $Id: hashtable.h,v 9999.22 2019/01/31 03:15:09 cvs Exp $
\*--------------------------------------------------------------------------*/
#ifndef _HASHTABLE_H
......@@ -16,7 +16,7 @@
* All access should be via the public functions declared below. */
typedef struct KeyValuePair_struct {
const void *key;
void *key;
void *value;
struct KeyValuePair_struct *next;
} KeyValuePair;
......@@ -52,11 +52,31 @@ ArrayList * ArrayListCreate(int init_capacity);
ArrayList * ArrayListDuplicate(ArrayList * ori);
void ArrayListDestroy(ArrayList * list);
void * ArrayListGet(ArrayList * list, long n);
void * ArrayListRandom(ArrayList * list);
int ArrayListPush(ArrayList * list, void * new_elem);
int ArrayListPush_NoRepeatedPtr(ArrayList * list, void * new_elem);
// Shift is the other direction of Pop:
// Say, a list has elements 0,1,2,3,4
// Pop removes 4 from the list and returns it.
// Shift repoves 0 from the list and returns it.
//
// Caller has to be sure that at least one element is in the list or a NULL is returned.
// Due to its nature, the caller has to deallocate the returned object. The internal deallocater is NOT called on the returned object.
void * ArrayListShift(ArrayList * list);
void * ArrayListPop(ArrayList * list);
void ArrayListSetDeallocationFunction(ArrayList * list, void (*elem_deallocator)(void *elem));
void ArrayListSort(ArrayList * list, int compare_L_minus_R(void * L_elem, void * R_elem));
// A simple comparison function if you want to sort unsigned long long ints.
int ArrayListLLUComparison(void * L_elem, void * R_elem);
// This function returns the index of the value in the list that is the minumum value greater than "value_less_than_dent".
// The elements in the list are all zero-based unsigned long long ints. The list must be sorted (small -> large) before calling this function.
// If the "value_less_than_dent" is larger than all the values in the list, "-1" is returned;
// If there are repeated numbers in the list, this function will always return the index of the first of them when this repeated number is the next dent.
//
long ArrayListFindNextDent(ArrayList * list, unsigned long long value_less_than_dent);
void HashTableIteration(HashTable * tab, void process_item(void * key, void * hashed_obj, HashTable * tab) );
ArrayList * HashTableKeyArray(HashTable * tab);
......@@ -187,7 +207,8 @@ int HashTablePutReplace(HashTable *hashTable, const void *key, void *value, int
* doesn't exist in the HashTable
\*--------------------------------------------------------------------------*/
void *HashTableGet(const HashTable *hashTable, const void *key);
void * HashTableGetKey(const HashTable *hashTable, const void *key);
void * HashTableGet(const HashTable *hashTable, const void *key);
/*--------------------------------------------------------------------------*\
* NAME:
......
This diff is collapsed.
This diff is collapsed.
......@@ -30,6 +30,7 @@
#define GENE_INPUT_FASTQ 1
#define GENE_INPUT_FASTA 2
#define GENE_INPUT_GZIP_FASTQ 51
#define GENE_INPUT_GZIP_FASTA 52
#define GENE_INPUT_SAM_SINGLE 93
#define GENE_INPUT_SAM_PAIR_1 94
......@@ -260,7 +261,7 @@ int parse_SAM_line(char * sam_line, char * read_name, int * flags, char * chro,
int find_subread_end(int len, int TOTAL_SUBREADS,int subread) ;
int break_SAM_file(char * in_SAM_file, int is_BAM, char * temp_file_prefix, unsigned int * real_read_count, int * block_no, chromosome_t * known_chromosomes, int is_sequence_needed, int base_ignored_head_tail, gene_value_index_t *array_index, gene_offset_t * offsets, unsigned long long int * all_Mapped_bases , HashTable * event_table_ptr, char * VCF_file);
int break_SAM_file(char * in_SAM_file, int is_BAM, char * temp_file_prefix, unsigned int * real_read_count, int * block_no, chromosome_t * known_chromosomes, int is_sequence_needed, int base_ignored_head_tail, gene_value_index_t *array_index, gene_offset_t * offsets, unsigned long long int * all_Mapped_bases , HashTable * event_table_ptr, char * VCF_file, unsigned long long * all_mapped_reads, int do_fragment_filtering, int push_to_read_head);
int get_known_chromosomes(char * in_SAM_file, chromosome_t * known_chromosomes);
......@@ -275,7 +276,7 @@ double guess_reads_density_format(char * fname, int is_sam, int * min_phred, int
FILE * get_temp_file_pointer(char *temp_file_name, HashTable* fp_table, int * close_immediately);
int write_read_block_file(FILE *temp_fp , unsigned int read_number, char *read_name, int flags, char * chro, unsigned int pos, char *cigar, int mapping_quality, char *sequence , char *quality_string, int rl , int is_sequence_needed, char strand, unsigned short read_pos, unsigned short read_len);
int write_read_block_file(FILE *temp_fp , unsigned int read_number, char *read_name, int flags, char * chro, unsigned int pos, char *cigar, int mapping_quality, char *sequence , char *quality_string, int rl , int is_sequence_needed, char strand, unsigned short read_pos, unsigned short read_len, unsigned short M_seg);
int get_read_block(char *chro, unsigned int pos, char *temp_file_suffix, chromosome_t *known_chromosomes, unsigned int * max_base_position);
int my_strcmp(const void * s1, const void * s2);
......
......@@ -29,7 +29,7 @@
#define LRMDYNAMIC_MAXIMUM_GAP_LENGTH (15000)
// " * 250" is for RNA-seq mapping -- a much larger dynamic programming space.
#define LRMINDEL_DYNAMIC_CHANNEL_TOLERANCE (150 * 250 )
#define LRMINDEL_DYNAMIC_CHANNEL_TOLERANCE (150 * 250 / 250 )
#define LRMSUBREAD_INDEX_OPTION_INDEX_GAP 0x0101
......
SUBREAD_VERSION_BASE=1.6.3
SUBREAD_VERSION_BASE=1.6.4
SUBREAD_VERSION_DATE=$(SUBREAD_VERSION_BASE)-$(shell date +"%d%b%Y")
SUBREAD_VERSION="$(SUBREAD_VERSION_DATE)"
SUBREAD_VERSION="$(SUBREAD_VERSION_BASE)"
......
......@@ -120,6 +120,8 @@ void set_hash_funcs(HashTable * tab, int is_key_freed, int is_value_string)
HashTableSetKeyComparisonFunction(tab,fc_strcmp_chro);
}
int warning_reported_repeated;
void do_find_common(char ** file_names, int files)
{
int badfiles = 0;
......@@ -194,6 +196,7 @@ void do_find_common(char ** file_names, int files)
char *tmp_pnt_alt = NULL;
//SUBREADprintf("GO_INTO_STR: %s\n", alt_str);
while(1)
{
char * alt_one = strtok_r(tmp_pnt_alt?NULL:alt_str,",", &tmp_pnt_alt);
......@@ -223,7 +226,8 @@ void do_find_common(char ** file_names, int files)
}
else
{
SUBREADprintf("Warning: repeated rows are found in the first input file!\n");
if(!warning_reported_repeated)SUBREADprintf("Warning: repeated rows are found in the first input file!\n");
warning_reported_repeated=1;
free(ky);
}
}
......@@ -249,6 +253,7 @@ void do_find_common(char ** file_names, int files)
}
free(ky);
}
if(NULL == tmp_pnt_alt) break;
}
}
......@@ -365,6 +370,7 @@ int findCommonVariants(int argc, char ** argv)
opterr = 1;
optopt = 63;
output_file_name[0]=0;
warning_reported_repeated=0;
while((c = getopt_long (argc, argv, "o:h", propm_long_options, &option_index)) != -1)
{
......
......@@ -288,7 +288,7 @@ int init_PE_sambam(propMapped_context * context)
char mac_rand[13];
mac_or_rand_str(mac_rand);
#ifdef MAKE_STANDALONE
srand(time(NULL));
myrand_srand(time(NULL));
#endif
int x1;
......
This diff is collapsed.
This diff is collapsed.
......@@ -31,6 +31,6 @@ typedef struct{
// Giving read_count '0' makas the function use its default value: read_count = 400,000,000, namely ~50MB memory is used to store the selection table (other parts of the program may use more memory)
// Note that this function generates ( 4 * all_reads ) bytes of temporary files in the current director or the directory specified in temp_location.
int repeated_read_removal(char * in_SAM_file, int threshold, char * out_SAM_file, char * temp_location, unsigned int read_count, int threads);
int repeated_read_removal(char * in_SAM_file, int threshold, char * out_SAM_file, char * temp_location, int threads);
#endif
......@@ -151,6 +151,7 @@ SamBam_FILE * SamBam_fopen(char * fname , int file_type)
if(first_ch!=31 || second_ch!=139)