Skip to content
Commits on Source (8)
File added
[submodule "thirdparty/gatb-core"]
path = thirdparty/gatb-core
url = https://github.com/GATB/gatb-core.git
project(DiscoSNP++)
cmake_minimum_required(VERSION 2.6)
################################################################################
# The version number.
################################################################################
SET (gatb-tool_VERSION_MAJOR 2)
SET (gatb-tool_VERSION_MINOR 2)
SET (gatb-tool_VERSION_PATCH 10)
IF (DEFINED MAJOR)
SET (gatb-tool_VERSION_MAJOR ${MAJOR})
ENDIF()
IF (DEFINED MINOR)
SET (gatb-tool_VERSION_MINOR ${MINOR})
ENDIF()
IF (DEFINED PATCH)
SET (gatb-tool_VERSION_PATCH ${PATCH})
ENDIF()
set (gatb-tool-version ${gatb-tool_VERSION_MAJOR}.${gatb-tool_VERSION_MINOR}.${gatb-tool_VERSION_PATCH})
# However, continuous integration has priority over local compilation
IF (DEFINED JENKINS_TAG)
SET (gatb-tool-version ${JENKINS_TAG})
ENDIF()
################################################################################
# Define cmake modules directory
################################################################################
SET (GATB_CORE_HOME ${PROJECT_SOURCE_DIR}/thirdparty/gatb-core/gatb-core)
SET (CMAKE_MODULE_PATH ${GATB_CORE_HOME}/cmake)
################################################################################
# THIRD PARTIES
################################################################################
# We don't want to install some GATB-CORE artifacts
SET (GATB_CORE_EXCLUDE_TESTS 1)
SET (GATB_CORE_EXCLUDE_EXAMPLES 1)
# GATB CORE
include (GatbCore)
################################################################################
# TOOLS GENERATION
################################################################################
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
set(PROJECT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bin)
#set(PROJECT_BINARY_DIR bin)
ADD_SUBDIRECTORY(tools)
################################################################################
# PACKAGING
################################################################################
SET (CPACK_PACKAGE_DESCRIPTION_SUMMARY "gatb-tool ${PROJECT_NAME}")
SET (CPACK_PACKAGE_VENDOR "Genscale team (INRIA)")
SET (CPACK_PACKAGE_VERSION_MAJOR "${gatb-tool_VERSION_MAJOR}")
SET (CPACK_PACKAGE_VERSION_MINOR "${gatb-tool_VERSION_MINOR}")
SET (CPACK_PACKAGE_VERSION_PATCH "${gatb-tool_VERSION_PATCH}")
SET (CPACK_PACKAGE_VERSION "${gatb-tool-version}")
# We chose the kind of archive we want to generate
SET (CPACK_GENERATOR "TGZ")
SET (CPACK_SOURCE_GENERATOR "TGZ")
# We ignore unwanted files for the source archive
SET (CPACK_SOURCE_IGNORE_FILES
"^${PROJECT_SOURCE_DIR}/\\.git/" ;
"^${PROJECT_SOURCE_DIR}/\\.gitmodules" ;
"^${PROJECT_SOURCE_DIR}/\\.gitignore" ;
"^${PROJECT_SOURCE_DIR}/build/" ;
"^${GATB_CORE_HOME}/\\.cproject" ;
"^${GATB_CORE_HOME}/\\.git/" ;
"^${GATB_CORE_HOME}/\\.project" ;
"^${GATB_CORE_HOME}/\\.gitignore"
)
# For creating the BINARY package we include the files we want
INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc DESTINATION .)
INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test DESTINATION .)
INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts DESTINATION . FILES_MATCHING REGEX ".*\\.(py|sh)$" PATTERN "jenkins" EXCLUDE)
INSTALL (FILES ${CMAKE_CURRENT_SOURCE_DIR}/run_discoSnp++.sh DESTINATION .)
INSTALL (FILES ${CMAKE_CURRENT_SOURCE_DIR}/README.md DESTINATION .)
INSTALL (FILES ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE DESTINATION .)
# We include the "bin" tag into binary archive file name
set (CPACK_PACKAGE_FILE_NAME ${PROJECT_NAME}-${CPACK_PACKAGE_VERSION}-bin-${CMAKE_SYSTEM_NAME})
# To be done at the end.
INCLUDE (CPack)
# CMake is required to compile software (http://www.cmake.org/cmake/resources/software.html)
#
# You can install software by executing this file: sh INSTALL
#
# Prepare GATB sub-module
git submodule init
git submodule update
# Prepare directories:
rm -rf build
mkdir build
# Go in the 'build' directory
cd build
# Prepare the makefile
cmake ..
# Run the newly created makefile:
make -j8
# Go back at the installation root directory
cd ..
# run tests
echo "Running simple test..."
cd test
. ./simple_test.sh
cd ..
This diff is collapsed.
# DiscoSnp++ and DiscoSnpRad
| **Linux** | **Mac OSX** |
|-----------|-------------|
[![Build Status](https://ci.inria.fr/gatb-core/view/DiscoSnp/job/tool-discosnp-build-debian7-64bits-gcc-4.7/badge/icon)](https://ci.inria.fr/gatb-core/view/DiscoSnp/job/tool-discosnp-build-debian7-64bits-gcc-4.7/) | [![Build Status](https://ci.inria.fr/gatb-core/view/DiscoSnp/job/tool-discosnp-build-macos-10.9.5-gcc-4.2.1/badge/icon)](https://ci.inria.fr/gatb-core/view/DiscoSnp/job/tool-discosnp-build-macos-10.9.5-gcc-4.2.1/)
[![License](http://img.shields.io/:license-affero-blue.svg)](http://www.gnu.org/licenses/agpl-3.0.en.html)
# What is DiscoSnp++?
DiscoSnp is designed for discovering all kinds of SNPs (not only isolated ones), as well as insertions and deletions, from raw set(s) of reads. The number of input read sets is not constrained, it can be one, two, or more. No reference genome is needed.
## Publications
Uricaru R., Rizk G., Lacroix V., Quillery E., Plantard O., Chikhi R., Lemaitre C., Peterlongo P. (2014). [Reference-free detection of isolated SNPs](http://nar.oxfordjournals.org/content/43/2/e11). Nucleic Acids Research 43(2):e11.
Peterlongo, P., Riou, C., Drezen, E., Lemaitre, C. (2017). [DiscoSnp ++ : de novo detection of small variants from raw unassembled read set(s).](http://doi.org/https://doi.org/10.1101/209965) BioRxiv.
## DiscoSnp++ or DiscoSnpRad
We propose a DiscoSnp++ adaptation for RAD-Seq data. A script, called `run_discoSnpRad.sh`, is adapted to this kind of data. See below for more details.
# Getting the latest source code
## Requirements
CMake 2.6+; see [http://www.cmake.org/cmake/resources/software.html](http://www.cmake.org/cmake/resources/software.html)
c++ compiler; compilation was tested with gcc and g++ version>=4.5 (Linux) and clang version>=4.1 (Mac OSX).
## Instructions
# get a local copy of DiscoSnp source code
git clone --recursive https://github.com/GATB/DiscoSnp.git
# compile the code an run a simple test on your computer
cd DiscoSnp
sh INSTALL
# Getting a binary stable release
Binary release for Linux and Mac OSX are provided within the "Releases" tab on Github/DiscoSnp web page.
After downloading and extracting the content of the binary archive, please run the following command from DiscoSnp home directory:
chmod +x run_discoSnp++.sh test/*.sh scripts/*.sh
# Quick start
Run DiscoSnp WITHOUT mapping results on a reference genome:
./run_discoSnp++.sh -r test/fof.txt -T
Run DiscoSnp WITH mapping results on a reference genome (requires bwa):
./run_discoSnp++.sh -r test/fof.txt -T -G test/reference_genome.fa
Note: if bwa is not in you PATH, then add the option "-B path_to_bwa". For instance:
./run_discoSnp++.sh -r test/fof.txt -T -G test/reference_genome.fa -B /home/me/my_programs/bwa-0.7.12/
Run DiscoSnp WITH mapping results on a reference genome AND using this reference genome for calling variants:
./run_discoSnp++.sh -r test/fof.txt -T -G test/reference_genome.fa -R
# User manual
See doc/discoSnp_user_guide.pdf or doc/discoSnp_user_guide.txt
# DiscoSnpRad
While dealing with RAD-Seq data, `run_discoSnpRad.sh` script should be used. It uses options specific to RAD-Seq: branching strategy, kind of extensions, abundance threshold, and kind of bubbles to be found. Moreover, it clusters variants per locus by calling the `discoRAD_finalization.sh` pipeline. Cluster information is reported in the final provided VCF file.
A README file describes all scripts and the `discoRAD_finalization.sh` pipeline.
# Contact
Remarks and questions: [https://www.biostars.org/t/discosnp/](https://www.biostars.org/t/discosnp/)
Contact: Pierre Peterlongo: [pierre.peterlongo@inria.fr](mailto:pierre.peterlongo@inria.fr)
discoSnp
Reference-free detection of isolated SNPs
v1.2.3
User's guide – July 2014
contact: pierre.peterlongo@inria.fr
Table of contents
CeCILL License 1
Publication 1
discoSnp features at a glance 2
Quick starting 2
Components 2
Download and install 2
Running discoSnp 3
Output 3
Extensions: differences between unitig and contigs (from version 2.1.1.3) 5
Output Analyze 5
CeCILL License
Copyright INRIA
This software is a computer program whose purpose is to find all the similar reads between two set of NGS reads. It also provide a similarity score between the two samples.
This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software. You can use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info".
As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability.
In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security.
The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms.
Publication
Publication is in preparation
discoSnp features at a glance
Software discoSnp is designed for extracting isolated Single Nucleotide Polymorphism (SNP) from raw set(s) of reads obtained with Next Generation Sequencers (NGS). Isolated means far away from any source of polymorphism, and far away means at least k bp, with k a user defined parameter.
Note that this tool is specially designed to use only a limited amount of memory (3 billions reads of size 100 can be treated with less that 6GB memory).
The software is composed of two independent modules. The first module, kissnp2, detects isolated SNPs from read sets. The second module, kissreads, enhances the kissnp2 results by computing per read set and for each found SNP i/ its mean read coverage and ii/ the average (phred) quality of reads generating the polymorphism.
Quick starting
After compiling programs (./compile_kissnp_kissreads.sh) the main script can be called as follows:
./run_discoSnp.sh -r "data_sample/reads_sequence1.fasta data_sample/reads_sequence2.fasta.gz"
This creates a fasta file called discoRes_k_31_c_4_coherent.fa containing the found SNPs.
Components
Licence and readme
clear_kissnp_kissreads.sh : cleaning the compiled tools. Useful when changing the k value
compile_kissnp_kissreads.sh : compile all binaries and put them in a ''tool'' directory
data_sample : directory containing two toy example read sets
kissnp2 : sources of the SNP detection module
kissreads : sources of the read coverage and quality module
minia : sources of the k-mer couting and datastructure
output_analyses : directory containing third party tools for post processing discoSnp results.
run_discoSnp.sh : main (hopefully user friendly) script running the both modules for searching SNPs from raw read sets and for counting their read coverage and quality
Download and install
Download from discoSnp web page – http://colibread.inria.fr/discosnp/ . Please read and accept the CeCILL license before downloading.
Unzip the downloaded package :
# unzip discoSnp_versionnumber.zip
Get into the newly created discoSnp directory:
# cd discoSnp
Compile the two modules (kissnp and kissreads) with the single command:
# ./compile_kissnp_kissreads.sh k_value
Note about the k value:
Replace k_value by the value of the kmer that you wish to use.
No need to recompile for any values below or equal to 31
No need to recompile for any values in [32,63]
No need to recompile for any values in [64,127]
...
By default, k=31 (value used both for compiling and in runtime).
If you wish to recompile (for instance for changing the k value from any value below or equal to 31 to any value bigger or equal to 32) first clear the previously compiled executables:
# ./clear_kissnp_kissreads.sh
This installation process should generate the “tool” directory containing the two executables : kissnp2 and kissreads.
Running discoSnp
The main script run_discoSnp.sh automatically run the two modules (SNP detection and read coverage and quality computations). You will provide the following information:
-r (read_sets) “readref.fasta readsnp.fastq.gz”: localization of the read files. Note that these files may be in fastq, or fasta, gzipped or not. If there are more than one read file, then they must be surrounded by the " character and they are separated by space.
This is the only mandatory parameter.
-p prefix_name: all temp and final files will be written will start with this prefix
default: “discoRes”
-k kmer_size: size of kmers (default: 31)
-b branching_strategy: branching filtering approach. This parameters influances the precision recall.
0: SNPs for wich any of the two paths is branching are discarted (high precision, lowers the recal in complex genomes). Default value
1: (smart branching) forbid SNPs for wich the two paths are branching (e.g. the two paths can be created either with a 'A' or a 'C' at the same position
2: No limitation on branching (lowers the precision, high recall)"
-c minimal_coverage: minimal kmer coverage
default 4
-d error_threshold: max number of errors per read (used by kissreads only).
default 1
-g genome_size: estimated genome size. Used only to control kissnp2 memory usage. e.g. 3 billion (3000000000) uses 4Gb of RAM.
Default 10000000 (uses a few MB of memory)
-w: If this option is called the waste files are not removed.
-h: show help.
By default binaries are search in the current “tool” directory. You may change this default value by editing the run_discoSnp.sh script changing the PATH_RS line
Additionally you may change some kissnp2 / kissreads options. In this case you may change the two corresponding lines in the run_discoSnp.sh file. To know the possible options, type ./tools/kissnp2 and/or ./tools/kissreads without options. Note that usually, changing these options is not necessary.
Sample example:
You can test discoSnp on a toy example containing 3 SNPs:
./run_discoSnp.sh -r "data_sample/reads_sequence1.fasta data_sample/reads_sequence2.fasta.gz"
Output
Final results are in prefix_coherent_k_kval_c_cval.fa file. This is a simple fasta file composed of a succession of pairs of sequences. Each pair corresponds to a SNP. Let's look at an example :
>SNP_higher_path_2|high|left_unitig_length_472|right_unitig_length_261|left_contig_length_472|right_contig_length_378|C1_8|C2_120|rank_0.88900
ttgcggataccgttgagacatcttataagtagacgcaatgcggaatcttatagaatcgcccgatagcgttgtgttggtggacacggctgattaccctctcacccgcgctattagcttccataccacctgcggccatccattaagatccgctgctcctcacgaaaaaagaattaataagaagtcccgtaacatgcggatttggtagtcgttatagacaactttactggggcgaactaaaacgcttgtggacagaattttggcagtggcaattaatctctaatgatgtgatattagggtctaaaatgtaagaattcggtgagttagattggacaaggggatccgaagatgttttggcgcagttagtcacagggggagcccctgcctacaaaaagcgcttactgttgactgtctagggatacagcgaaagcggcagtcgttgaagcaaaagtgatatgtgcgacactgcatctagGCAGCGCAACAACGCAACAGCTCGAGGTGTACTTCGCAGAGAAACCGCACGTCCAGTTCTAacactctcatatgtgctcgtcgtttatgctttcggcgtgaaaactggtgcgccggtgtctggagaccatccttcttgcgtatgactccaaggacagccatcacggtttgtgggttcactgggactgtcacgcttaaccggacggaactcgagaaggcatacgactggtcgtaagaccgctctgatccgacaccaccataacgcggcactcatgattatcatcacttttttagtccctattacagagctgccgggtggatgactctctaccgcgctctgtggaagtgcacttgatcgttttgctgtagaaaaaacttaataaacagaatgccgatgaaggcactactgtactaatagggccgggctacatgttaactac
>SNP_lower_path_2|high|left_unitig_length_472|right_unitig_length_261|left_contig_length_472|right_contig_length_378|C1_118|C2_6|rank_0.88900
ttgcggataccgttgagacatcttataagtagacgcaatgcggaatcttatagaatcgcccgatagcgttgtgttggtggacacggctgattaccctctcacccgcgctattagcttccataccacctgcggccatccattaagatccgctgctcctcacgaaaaaagaattaataagaagtcccgtaacatgcggatttggtagtcgttatagacaactttactggggcgaactaaaacgcttgtggacagaattttggcagtggcaattaatctctaatgatgtgatattagggtctaaaatgtaagaattcggtgagttagattggacaaggggatccgaagatgttttggcgcagttagtcacagggggagcccctgcctacaaaaagcgcttactgttgactgtctagggatacagcgaaagcggcagtcgttgaagcaaaagtgatatgtgcgacactgcatctagGCAGCGCAACAACGCAACAGCTCGAGGTGTTCTTCGCAGAGAAACCGCACGTCCAGTTCTAacactctcatatgtgctcgtcgtttatgctttcggcgtgaaaactggtgcgccggtgtctggagaccatccttcttgcgtatgactccaaggacagccatcacggtttgtgggttcactgggactgtcacgcttaaccggacggaactcgagaaggcatacgactggtcgtaagaccgctctgatccgacaccaccataacgcggcactcatgattatcatcacttttttagtccctattacagagctgccgggtggatgactctctaccgcgctctgtggaagtgcacttgatcgttttgctgtagaaaaaacttaataaacagaatgccgatgaaggcactactgtactaatagggccgggctacatgttaactac
In this example a SNP A/T is found (underlined here). The central sequence of length 2k-1 (here 2*31-1=61) is seen in upper case, while the two (left and right) extensions are seen in lower case.
The comments are formatted as follow :
>SNP_higher/lower_path_id|high/low|left_unitig_length_int|right_unitigtig_length_int|left_contig_length_int|right_contig_length_int|C1_int|C2_int|[Q1_int|Q2_int|]rank_float
higher/lower: one of the two alleles
id: id of the SNP: each SNP (couple of sequences) has a unique id, here 3.
high/low: sequence complexity. If the sequece if of low complexity (e.g. ATATATATATATATAT) this variable would be low
left_unitig_length: size of the full left extension. Here 472
right_unitig_length: size of the right extension. Here 261
left_contig_length: size of the full left extension. Here 472
right_contig_length: size of the right extension. Here 378
C1: number of reads mapping the central upper case sequence from the first read set
C2: number of reads mapping the central upper case sequence from the second read set
C3 [if input data were at least 3 read sets]: number of reads mapping the central upper case sequence from the third read set
C4, C5, ...
Q1 [if reads were given in fastq]: average phred quality of the central nucleotide (here A or T) from the mapped reads from the first read set.
Q2 [if reads were given in fastq]: average phred quality of the central nucleotide (here A or T) from the mapped reads from the second read set.
Q3 [if the data were at least 3 fastq read sets]: average phred quality of the central nucleotide (here A or T) from the mapped reads from the third read set.
Q4, Q5, …
rank: ranks the predictions according to their read coverage in each condition favoring SNPs that are discriminant between conditions (Phi coefficient) (see publication)
Extensions: differences between unitig and contigs (from version 2.1.1.3)
By default in the pipeline, the found SNPs (of length 2k-1) are extended using a contiger. The output contains such contigs and their lengths are shown in the header (left_contig_length and right_contig_length). Moreover, a contig may hide some small polymorphism such as substitutions and/or indels. The output also proposes the length of the longuest extension not containing any such polymorphism. These extensions are called unitigs (defined as « A uniquely assembleable subset of overlapping fragments »).
Output Analyze
From a fasta format to a csv format: If you wish to analyze the results in a tabulated format:
# python output_analyses/discoSnp_to_csv.py discoSnp_output.fa
will output a .csv tabulated file containing on each line the content of 4 lines of the .fa, replacing the '|' character by spaces and removing the CX_
example with previously used SNP example:
>SNP_higher_path_3 high left_contig_length_86 right_contig_length_52 78 5 rank_0.89839 tctctaccgcgctctgtggaagtgcacttgatcgttttgctgtagaaaaaacttaataaacagaatgccgatgaaggcactactgtACTAATAGGGCCGGGCTACATGTTAACTACAAGGCTATAACCTATTGATGACCCGGTCCATacataacttggtatcgtgcatgtagcgttcaagggctatagcaattccgacg >SNP_lower_path_3 high left_contig_length_86 right_contig_length_52 4 91 rank_0.89839 tctctaccgcgctctgtggaagtgcacttgatcgttttgctgtagaaaaaacttaataaacagaatgccgatgaaggcactactgtACTAATAGGGCCGGGCTACATGTTAACTACTAGGCTATAACCTATTGATGACCCGGTCCATacataacttggtatcgtgcatgtagcgttcaagggctatagcaattccgacg
Genotyping the results: If you wish to genotype your results:
#python output_analyses/discoSnp_to_genotypes.py discoSnp_output.fa threshold_value
will output a file containing on each line the “genotypes” of a SNP. For each input data set it indicates if the SNP is:
heterozygous ALT1 path (coverage ALT1 >= threshold and ALT2 < threshold): 1
heterozygous ALT2 path (coverage ALT1 < threshold and ALT2 >= threshold): -1
homozygous (coverage ALT1 >= threshold and ALT2 >= threshold): 2
absent (coverage ALT1 < threshold and ALT2 < threshold): 0
then it outputs the central sequence of length 2k-1 replacing the central position by ALT1/ALT2
example with previously used SNP example and threshold 20:
GENOTYPES_SNP_3_THRESHOLD_20 1 -1 TACTAATAGGGCCGGGCTACATGTTAACTACA/TAGGCTATAACCTATTGATGACCCGGTCCATA
#Copyright inria / irisa (2013)
#
#
#raluca.uricaru@gmail.com
#pierre.peterlongo@inria.fr
#
#This software is a computer program whose purpose is to call SNPs from NGS reads.
#
#This software is governed by the CeCILL license under French law and
#abiding by the rules of distribution of free software. You can use,
#modify and/ or redistribute the software under the terms of the CeCILL
#license as circulated by CEA, CNRS and INRIA at the following URL
#"http:#www.cecill.info".
#
#As a counterpart to the access to the source code and rights to copy,
#modify and redistribute granted by the license, users are provided only
#with a limited warranty and the software's author, the holder of the
#economic rights, and the successive licensors have only limited
#liability.
#
#In this respect, the user's attention is drawn to the risks associated
#with loading, using, modifying and/or developing or reproducing the
#software by the user in light of its specific status of free software,
#that may mean that it is complicated to manipulate, and that also
#therefore means that it is reserved for developers and experienced
#professionals having in-depth computer knowledge. Users are therefore
#encouraged to load and test the software's suitability as regards their
#requirements in conditions enabling the security of their systems and/or
#data to be ensured and, more generally, to use and operate it in the
#same conditions as regards security.
#
#The fact that you are presently reading this means that you have had
#knowledge of the CeCILL license and that you accept its terms.
cd kissnp2/
make clean
cd ../kissreads/
make clean
cd ..
rm -f ./tools/kissreads
rm -f ./tools/kissnp2
#Copyright inria / irisa (2013)
#
#
#raluca.uricaru@gmail.com
#pierre.peterlongo@inria.fr
#
#This software is a computer program whose purpose is to call SNPs from NGS reads.
#
#This software is governed by the CeCILL license under French law and
#abiding by the rules of distribution of free software. You can use,
#modify and/ or redistribute the software under the terms of the CeCILL
#license as circulated by CEA, CNRS and INRIA at the following URL
#"http:#www.cecill.info".
#
#As a counterpart to the access to the source code and rights to copy,
#modify and redistribute granted by the license, users are provided only
#with a limited warranty and the software's author, the holder of the
#economic rights, and the successive licensors have only limited
#liability.
#
#In this respect, the user's attention is drawn to the risks associated
#with loading, using, modifying and/or developing or reproducing the
#software by the user in light of its specific status of free software,
#that may mean that it is complicated to manipulate, and that also
#therefore means that it is reserved for developers and experienced
#professionals having in-depth computer knowledge. Users are therefore
#encouraged to load and test the software's suitability as regards their
#requirements in conditions enabling the security of their systems and/or
#data to be ensured and, more generally, to use and operate it in the
#same conditions as regards security.
#
#The fact that you are presently reading this means that you have had
#knowledge of the CeCILL license and that you accept its terms.
k=29
if test $# -eq 0
then
k=29
else
k=$1
fi
mkdir tools
cd kissnp2
make k=$k
cd ../
cp kissnp2/kissnp2 tools
if [ $? -ne 0 ]
then
echo "there was a problem compiling kissnp2, sorry. Please fix the problem or contact pierre.peterlongo@inria.fr."
exit
fi
cd kissreads
make MYFLAGS=-DOMP k=$k # VERSION KISSREADS PARALLEL
#make k=$k # VERSION KISSREADS SEQUENCIAL
cd ..
cp kissreads/kissreads tools
if [ $? -ne 0 ]
then
echo "there was a problem compiling kissreads, sorry. Please fix the problem or contact pierre.peterlongo@inria.fr."
exit
fi
echo "Compiling is done, if possible, :"
echo -e "\t 1/ copy executables \"tools/kissnp2\" and \"tools/kissreads\" in a directory member of the PATH environment variable (e.g. /usb/local/bin)"
echo -e "\t 2/ replace PATH_RS=\"./tools\" by PATH_RS=\"\" in the \"run_discoSnp.sh\" configuration file"
MY_PATH="`( cd \"$MY_PATH\" && pwd )`" # absolutized and normalized
if [ -z "$MY_PATH" ] ; then
# error; for some reason, the path is not accessible
# to the script (e.g. permissions re-evaled after suid)
exit 1 # fail
fi
echo -e " or"
echo "Leave it as is. In this case, if working outside this current directory (\"$MY_PATH\"), you will have to indicate in the \"run_discoSnp.sh\" where executables \"kissnp2\" and \"kissreads\" are located by changing the value of the PATH_RS variable"
This diff is collapsed.
discosnp (2.3.0-1) UNRELEASED; urgency=medium
* Point watch file to Github
* New upstream version
* debhelper 11
* Point Vcs fields to salsa.debian.org
* Standards-Version: 4.2.1
-- Andreas Tille <tille@debian.org> Sat, 20 Oct 2018 10:56:58 +0200
discosnp (1.2.6-2) unstable; urgency=medium
* Secure URI in watch file
......
......@@ -4,22 +4,19 @@ Uploaders: Olivier Sallou <osallou@debian.org>,
Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 10),
bc,
zlib1g-dev,
help2man
Standards-Version: 4.1.1
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/discosnp.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/discosnp.git
Build-Depends: debhelper (>= 11~),
cmake,
libgatbcore-dev
Standards-Version: 4.2.1
Vcs-Browser: https://salsa.debian.org/med-team/discosnp
Vcs-Git: https://salsa.debian.org/med-team/discosnp.git
Homepage: http://colibread.inria.fr/discosnp/
Package: discosnp
Architecture: any
Depends: ${misc:Depends},
${shlibs:Depends},
zlib1g,
bc,
python
${python3:Depends}
Description: discovering Single Nucleotide Polymorphism from raw set(s) of reads
Software discoSnp is designed for discovering Single Nucleotide
Polymorphism (SNP) from raw set(s) of reads obtained with Next Generation
......
Subject: Add hardening
Description: makefiles do not import default CFLAGS,...
this patch add debian cflags for hardening
Forwarded: no
Author: Olivier Sallou <osallou@debian.org>
Last-Updated: 2014-02-01
--- a/kissnp2/makefile
+++ b/kissnp2/makefile
@@ -33,7 +33,10 @@
#knowledge of the CeCILL license and that you accept its terms.
CC=g++
-CFLAGS= -O4 -lz -DMINIA_IS_IN_PARENT_FOLDER
+CPPFLAGS:=$(shell dpkg-buildflags --get CPPFLAGS)
+CFLAGS = $(shell dpkg-buildflags --get CFLAGS) $(CPPFLAGS) -O4 -lz -DMINIA_IS_IN_PARENT_FOLDER
+LDFLAGS:=$(shell dpkg-buildflags --get LDFLAGS)
+
#CFLAGS= -O4 -lz -DMINIA_IS_IN_PARENT_FOLDER -DDONTMARK
SRC=../minia/Pool.cpp ../minia/Bank.cpp ../minia/Bloom.cpp ../minia/Hash16.cpp ../minia/Terminator.cpp ../minia/Kmer.cpp ../minia/Traversal.cpp ../minia/LinearCounter.cpp ../minia/Set.cpp ../minia/Utils.cpp ../minia/SortingCount.cpp ../minia/Debloom.cpp ../minia/OAHash.cpp Kmer_for_kissnp2.cpp SNP.cpp filter.cpp IterativeExtensions.cpp commons.cpp
EXEC=kissnp2
@@ -41,7 +44,7 @@
all: $(EXEC)
ifeq ($(prof),1)
- CFLAGS=-O3 -pg -lz
+ CFLAGS= $(shell dpkg-buildflags --get CFLAGS) $(CPPFLAGS) -O3 -pg -lz
endif
ifeq ($(deb),1)
CFLAGS+=-O0 -DASSERTS -g -lz
@@ -79,7 +82,7 @@
all: $(EXEC)
kissnp2: $(OBJ) kissnp2.cpp
- $(CC) -o $@ $^ $(CFLAGS)
+ $(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS)
%.o: %.cpp %.h
$(CC) -o $@ -c $< $(CFLAGS)
--- a/minia/makefile
+++ b/minia/makefile
@@ -1,4 +1,7 @@
-CFLAGS+= -O4 -D_FILE_OFFSET_BITS=64 # needed to handle files > 2 GB on 32 bits systems
+#CFLAGS+= -O4 -D_FILE_OFFSET_BITS=64 # needed to handle files > 2 GB on 32 bits systems
+CPPFLAGS:=$(shell dpkg-buildflags --get CPPFLAGS)
+CFLAGS = $(shell dpkg-buildflags --get CFLAGS) $(CPPFLAGS) -O4 -D_FILE_OFFSET_BITS=64
+LDFLAGS:=$(shell dpkg-buildflags --get LDFLAGS)
SRC=Pool.cpp Bank.cpp Bloom.cpp Hash16.cpp LargeInt.cpp Kmer.cpp Terminator.cpp Traversal.cpp LinearCounter.cpp Set.cpp Utils.cpp SortingCount.cpp Debloom.cpp OAHash.cpp
EXEC=minia
OBJ= $(SRC:.cpp=.o)
@@ -60,7 +63,7 @@
$(MAKE) $(EXEC)
minia: $(OBJ) Minia.cpp
- $(CXX) -o $@ $(OBJ) Minia.cpp $(CFLAGS) -lz
+ $(CXX) -o $@ $(OBJ) Minia.cpp $(CFLAGS) $(LDFLAGS) -lz
%.o: %.cpp %.h
$(CXX) -o $@ -c $< $(CFLAGS)
--- a/kissreads/Makefile
+++ b/kissreads/Makefile
@@ -20,10 +20,10 @@
# optimized flags
#CFLAGS=-Wall -O3 -lz -fopenmp -DOMP #openmp is incompatiblre with clang #@!§
-CFLAGS= -O3 -lz -fopenmp -DOMP #openmp is incompatiblre with clang #@!
+CFLAGS = $(shell dpkg-buildflags --get CFLAGS) $(CPPFLAGS) -Wall -O3 -lz
CFLAGS+=$(MYFLAGS)
#LDFLAGS+=-Wall -lm -O3 -lz -DOMP -fopenmp#-fopenmp
-LDFLAGS+= -lm -O3 -lz -DOMP -fopenmp#-fopenmp
+LDFLAGS:=$(shell dpkg-buildflags --get LDFLAGS) -Wall -lm -O3 -lz
## debug flags
@@ -51,7 +51,7 @@
@echo "#################"
kissreads: $(OBJ)
- $(CC) -o $@ $^ $(LDFLAGS)
+ $(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS)
kissReads.o: $(INCLDIR)coherence_algorithm.h $(INCLDIR)couple.h $(INCLDIR)extension_algorithm.h $(INCLDIR)fragment_info.h $(INCLDIR)libchash.h $(INCLDIR)outputs.h $(INCLDIR)commons.h $(INCLDIR)extending_fragment.h $(INCLDIR)fragment_index.h $(INCLDIR)hash.h $(INCLDIR)list.h
......@@ -7,27 +7,4 @@ include /usr/share/dpkg/default.mk
%:
dh $@
override_dh_auto_build:
dh_auto_build
cd kissnp2 && make k=29
cd kissreads && make k=29
override_dh_install:
cp output_analyses/discoSnp_to_genotypes.py output_analyses/discoSnp_to_genotypes
cp output_analyses/discoSnp_to_csv.py output_analyses/discoSnp_to_csv
help2man --help-option=-h --no-discard-stderr --version-string=$(DEB_VERSION_UPSTREAM) kissnp2/kissnp2 > kissnp2.1
help2man --help-option=-h --no-discard-stderr --version-string=$(DEB_VERSION_UPSTREAM) kissreads/kissreads > kissreads.1
dh_install
override_dh_clean:
cd minia && make clean
cd kissnp2 && make clean
cd kissreads && make clean
rm -f output_analyses/discoSnp_to_genotypes
rm -f output_analyses/discoSnp_to_csv
rm -f kissnp2/kissnp2
rm -f kissreads/kissreads
rm -f kissreads.1
rm -f kissnp2.1
dh_clean
version=4
https://colibread.inria.fr/discosnp/ https://colibread.inria.fr/files/\d+/\d+/discoSnp_([.\d]+)\.zip
https://github.com/GATB/DiscoSnp/releases .*/archive/v?@ANY_VERSION@@ARCHIVE_EXT@