Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille
--- a/.DS_Store
+++ b/.DS_Store
--- a/.gitignore
+++ b/.gitignore
+/build
--- a/.gitmodules
+++ b/.gitmodules
+[submodule "thirdparty/gatb-core"]
+	path = thirdparty/gatb-core
+	url = https://github.com/GATB/gatb-core.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
+project(DiscoSNP++)
+
+cmake_minimum_required(VERSION 2.6)
+
+################################################################################
+# The version number.
+################################################################################
+SET (gatb-tool_VERSION_MAJOR 2)
+SET (gatb-tool_VERSION_MINOR 2)
+SET (gatb-tool_VERSION_PATCH 10)
+
+IF (DEFINED MAJOR)
+    SET (gatb-tool_VERSION_MAJOR ${MAJOR})
+ENDIF()
+IF (DEFINED MINOR)
+    SET (gatb-tool_VERSION_MINOR ${MINOR})
+ENDIF()
+IF (DEFINED PATCH)
+    SET (gatb-tool_VERSION_PATCH ${PATCH})
+ENDIF()
+
+set (gatb-tool-version ${gatb-tool_VERSION_MAJOR}.${gatb-tool_VERSION_MINOR}.${gatb-tool_VERSION_PATCH})
+
+# However, continuous integration has priority over local compilation
+IF (DEFINED JENKINS_TAG)
+    SET (gatb-tool-version ${JENKINS_TAG})
+ENDIF()
+
+################################################################################
+# Define cmake modules directory
+################################################################################
+SET (GATB_CORE_HOME  ${PROJECT_SOURCE_DIR}/thirdparty/gatb-core/gatb-core)
+SET (CMAKE_MODULE_PATH ${GATB_CORE_HOME}/cmake)
+
+################################################################################
+# THIRD PARTIES
+################################################################################
+
+# We don't want to install some GATB-CORE artifacts
+SET (GATB_CORE_EXCLUDE_TESTS     1)
+SET (GATB_CORE_EXCLUDE_EXAMPLES  1)
+
+# GATB CORE
+include (GatbCore)
+
+################################################################################
+#  TOOLS GENERATION
+################################################################################
+
+SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
+
+set(PROJECT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bin)
+#set(PROJECT_BINARY_DIR bin)
+ADD_SUBDIRECTORY(tools)
+
+################################################################################
+#  PACKAGING
+################################################################################
+
+SET (CPACK_PACKAGE_DESCRIPTION_SUMMARY  "gatb-tool ${PROJECT_NAME}")
+SET (CPACK_PACKAGE_VENDOR               "Genscale team (INRIA)")
+SET (CPACK_PACKAGE_VERSION_MAJOR        "${gatb-tool_VERSION_MAJOR}")
+SET (CPACK_PACKAGE_VERSION_MINOR        "${gatb-tool_VERSION_MINOR}")
+SET (CPACK_PACKAGE_VERSION_PATCH        "${gatb-tool_VERSION_PATCH}")
+SET (CPACK_PACKAGE_VERSION              "${gatb-tool-version}")
+
+# We chose the kind of archive we want to generate
+SET (CPACK_GENERATOR            "TGZ")
+SET (CPACK_SOURCE_GENERATOR     "TGZ")
+
+# We ignore unwanted files for the source archive
+SET (CPACK_SOURCE_IGNORE_FILES
+    "^${PROJECT_SOURCE_DIR}/\\.git/"     ;
+    "^${PROJECT_SOURCE_DIR}/\\.gitmodules" ;
+    "^${PROJECT_SOURCE_DIR}/\\.gitignore" ;
+    "^${PROJECT_SOURCE_DIR}/build/"  ;
+    "^${GATB_CORE_HOME}/\\.cproject" ;
+    "^${GATB_CORE_HOME}/\\.git/"     ;
+    "^${GATB_CORE_HOME}/\\.project"  ;
+    "^${GATB_CORE_HOME}/\\.gitignore"
+)
+
+# For creating the BINARY package we include the files we want
+INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc                DESTINATION .)
+INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test               DESTINATION .)
+INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts            DESTINATION . FILES_MATCHING REGEX ".*\\.(py|sh)$"  PATTERN "jenkins" EXCLUDE)
+INSTALL (FILES     ${CMAKE_CURRENT_SOURCE_DIR}/run_discoSnp++.sh  DESTINATION .)
+INSTALL (FILES     ${CMAKE_CURRENT_SOURCE_DIR}/README.md          DESTINATION .)
+INSTALL (FILES     ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE            DESTINATION .)
+
+# We include the "bin" tag into binary archive file name
+set (CPACK_PACKAGE_FILE_NAME  ${PROJECT_NAME}-${CPACK_PACKAGE_VERSION}-bin-${CMAKE_SYSTEM_NAME})
+
+# To be done at the end.
+INCLUDE (CPack)
--- a/INSTALL
+++ b/INSTALL
+# CMake is required to compile software (http://www.cmake.org/cmake/resources/software.html)
+#
+# You can install software by executing this file: sh INSTALL
+#
+
+# Prepare GATB sub-module
+git submodule init
+git submodule update
+
+# Prepare directories:
+rm -rf build
+mkdir  build
+
+# Go in the 'build' directory
+cd build
+
+# Prepare the makefile
+cmake ..
+
+# Run the newly created makefile:
+make -j8
+
+# Go back at the installation root directory
+cd ..
+
+# run tests
+echo "Running simple test..."
+cd test
+. ./simple_test.sh
+cd ..
--- a/LICENSE
+++ b/LICENSE
--- a/README.md
+++ b/README.md
+# DiscoSnp++ and DiscoSnpRad
+
+
+| **Linux** | **Mac OSX** |
+|-----------|-------------|
+[![Build Status](https://ci.inria.fr/gatb-core/view/DiscoSnp/job/tool-discosnp-build-debian7-64bits-gcc-4.7/badge/icon)](https://ci.inria.fr/gatb-core/view/DiscoSnp/job/tool-discosnp-build-debian7-64bits-gcc-4.7/) | [![Build Status](https://ci.inria.fr/gatb-core/view/DiscoSnp/job/tool-discosnp-build-macos-10.9.5-gcc-4.2.1/badge/icon)](https://ci.inria.fr/gatb-core/view/DiscoSnp/job/tool-discosnp-build-macos-10.9.5-gcc-4.2.1/)
+
+[![License](http://img.shields.io/:license-affero-blue.svg)](http://www.gnu.org/licenses/agpl-3.0.en.html)
+
+# What is DiscoSnp++?
+
+DiscoSnp is designed for discovering all kinds of SNPs (not only isolated ones),  as well as insertions and deletions, from raw set(s) of reads. The number of input read sets is not constrained, it can be one, two, or more. No reference genome is needed.
+
+## Publications
+
+Uricaru R., Rizk G., Lacroix V., Quillery E., Plantard O., Chikhi R., Lemaitre C., Peterlongo P. (2014). [Reference-free detection of isolated SNPs](http://nar.oxfordjournals.org/content/43/2/e11). Nucleic Acids Research 43(2):e11.
+
+Peterlongo, P., Riou, C., Drezen, E., Lemaitre, C. (2017). [DiscoSnp ++ : de novo detection of small variants from raw unassembled read set(s).](http://doi.org/https://doi.org/10.1101/209965) BioRxiv.
+
+## DiscoSnp++ or DiscoSnpRad
+We propose a DiscoSnp++ adaptation for RAD-Seq data. A script, called `run_discoSnpRad.sh`, is adapted to this kind of data. See below for more details.
+
+# Getting the latest source code
+
+## Requirements
+
+CMake 2.6+; see [http://www.cmake.org/cmake/resources/software.html](http://www.cmake.org/cmake/resources/software.html)
+
+c++ compiler; compilation was tested with gcc and g++ version>=4.5 (Linux) and clang version>=4.1 (Mac OSX).
+
+## Instructions
+
+    # get a local copy of DiscoSnp source code
+    git clone --recursive https://github.com/GATB/DiscoSnp.git
+    
+    # compile the code an run a simple test on your computer
+    cd DiscoSnp
+    sh INSTALL
+
+# Getting a binary stable release
+
+Binary release for Linux and Mac OSX are provided within the "Releases" tab on Github/DiscoSnp web page.
+
+After downloading and extracting the content of the binary archive, please run the following command from DiscoSnp home directory:
+
+    chmod +x run_discoSnp++.sh test/*.sh scripts/*.sh
+
+# Quick start
+
+Run DiscoSnp WITHOUT mapping results on a reference genome:
+
+    ./run_discoSnp++.sh -r test/fof.txt -T
+
+Run DiscoSnp WITH mapping results on a reference genome (requires bwa):
+
+    ./run_discoSnp++.sh -r test/fof.txt -T  -G test/reference_genome.fa
+
+Note: if bwa is not in you PATH, then add the option "-B path_to_bwa". For instance:
+
+    ./run_discoSnp++.sh -r test/fof.txt -T  -G test/reference_genome.fa -B /home/me/my_programs/bwa-0.7.12/
+
+Run DiscoSnp WITH mapping results on a reference genome AND using this reference genome for calling variants:
+
+    ./run_discoSnp++.sh -r test/fof.txt -T  -G test/reference_genome.fa -R
+
+# User manual
+
+See doc/discoSnp_user_guide.pdf or doc/discoSnp_user_guide.txt
+
+# DiscoSnpRad
+While dealing with RAD-Seq data,  `run_discoSnpRad.sh` script should be used. It uses options specific to RAD-Seq: branching strategy, kind of extensions, abundance threshold, and kind of bubbles to be found. Moreover, it clusters variants per locus by calling the `discoRAD_finalization.sh` pipeline. Cluster information is  reported in the final provided VCF file. 
+
+A README file describes all scripts and the `discoRAD_finalization.sh` pipeline.
+
+# Contact
+
+Remarks and questions: [https://www.biostars.org/t/discosnp/](https://www.biostars.org/t/discosnp/)
+
+Contact: Pierre Peterlongo: [pierre.peterlongo@inria.fr](mailto:pierre.peterlongo@inria.fr)
--- a/README.pdf
+++ b/README.pdf
--- a/README.txt
+++ b/README.txt
-discoSnp
-Reference-free detection of isolated SNPs
-v1.2.3
-User's guide – July 2014
-
-contact: pierre.peterlongo@inria.fr
-Table of contents
-CeCILL License	1
-Publication	1
-discoSnp features at a glance	2
-Quick starting	2
-Components	2
-Download and install	2
-Running discoSnp	3
-Output	3
-Extensions: differences between unitig and contigs (from version 2.1.1.3)	5
-Output Analyze	5
-
-CeCILL License
-Copyright INRIA
-
-This software is a computer program whose purpose is to find all the similar reads between two set of NGS reads. It also provide a similarity score between the two samples.
-
-This software is governed by the CeCILL license under French law and abiding by the rules of distribution of free software.  You can  use, modify and/ or redistribute the software under the terms of the CeCILL license as circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info".
-
-As a counterpart to the access to the source code and  rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty  and the software's author,  the holder of the economic rights,  and the successive licensors  have only  limited liability.
-
-In this respect, the user's attention is drawn to the risks associated with loading,  using,  modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean  that it is complicated to manipulate,  and  that  also therefore means  that it is reserved for developers  and  experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their systems and/or data to be ensured and,  more generally, to use and operate it in the same conditions as regards security.
-
-The fact that you are presently reading this means that you have had knowledge of the CeCILL license and that you accept its terms.
-
-Publication
-Publication is in preparation
-discoSnp features at a glance
-Software discoSnp is designed for extracting isolated Single Nucleotide Polymorphism (SNP) from raw set(s) of reads obtained with Next Generation Sequencers (NGS). Isolated means far away from any source of polymorphism, and far away means at least k bp, with k a user defined parameter.
-Note that this tool is specially designed to use only a limited amount of memory (3 billions reads of size 100 can be treated with less that 6GB memory).
-The software is composed of two independent modules. The first module, kissnp2, detects isolated SNPs from read sets. The second module, kissreads, enhances the kissnp2 results by computing per read set  and for each found SNP i/ its mean read coverage and ii/ the average (phred) quality of reads generating the polymorphism. 
-Quick starting
-After compiling programs (./compile_kissnp_kissreads.sh) the main script can be called as follows:
-   ./run_discoSnp.sh -r "data_sample/reads_sequence1.fasta data_sample/reads_sequence2.fasta.gz"
-
-This creates a fasta file called discoRes_k_31_c_4_coherent.fa containing the found SNPs.
-Components
-Licence and readme
-clear_kissnp_kissreads.sh : cleaning the compiled tools. Useful when changing the k value
-compile_kissnp_kissreads.sh : compile all binaries and put them in a ''tool'' directory
-data_sample : directory containing two toy example read sets
-kissnp2 : sources of the SNP detection module
-kissreads : sources of the read coverage and quality module
-minia : sources of the k-mer couting and datastructure
-output_analyses : directory containing third party tools for post processing discoSnp results.
-run_discoSnp.sh : main (hopefully user friendly) script running the both modules for searching SNPs from raw read sets and for counting their read coverage and quality 
-Download and install
-Download from discoSnp web page – http://colibread.inria.fr/discosnp/ . Please read and accept the CeCILL license before downloading.
-Unzip the downloaded package :
- 	# unzip discoSnp_versionnumber.zip
-Get into the newly created discoSnp directory:
-	# cd discoSnp
-Compile the two modules (kissnp and kissreads) with the single command:
- 	# ./compile_kissnp_kissreads.sh k_value
-Note about the k value: 
-	Replace k_value by the value of the kmer that you wish to use. 
-	No need to recompile for any values below or equal to 31
-	No need to recompile for any values in [32,63]
-	No need to recompile for any values in [64,127]
-	...
-	By default, k=31 (value used both for compiling and in runtime).
-	
-If you wish to recompile (for instance for changing the k value from any value below or equal to 31 to any value bigger or equal to 32) first clear the previously compiled executables:
-	# ./clear_kissnp_kissreads.sh 
-
-This installation process should generate the “tool” directory containing the two executables : kissnp2 and  kissreads. 
-
-Running discoSnp
-The main script run_discoSnp.sh automatically run the two modules (SNP detection and read coverage and quality computations). You will provide the following information:
-r (read_sets) “readref.fasta   readsnp.fastq.gz”: localization of the read files. Note that these files may be in fastq, or fasta, gzipped or not. If there are more than one read file, then they must be surrounded by the " character and they are separated by space. 
-This is the only mandatory parameter.
-p prefix_name: all temp and final files will be written will start with this prefix
-default: “discoRes”
-k kmer_size:  size of kmers (default: 31)
-b branching_strategy:  branching filtering approach. This parameters influances the precision recall. 
-0: SNPs for wich any of the two paths is branching are discarted (high precision, lowers the recal in complex genomes). Default value
-1: (smart branching) forbid SNPs for wich the two paths are branching (e.g. the two paths can be created either with a 'A' or a 'C' at the same position 
-2: No limitation on branching (lowers the precision, high recall)"
-c minimal_coverage: minimal kmer coverage 
-default 4
-d error_threshold: max number of errors per read (used by kissreads only). 
-default 1
-g genome_size: estimated genome size. Used only to control kissnp2 memory usage. e.g. 3 billion (3000000000) uses 4Gb of RAM.
-Default 10000000 (uses a few MB of memory)
-w: If this option is called the waste files are not removed.
-h: show help.
-By default binaries are search in the current “tool” directory. You may change this default value by editing the run_discoSnp.sh script changing the PATH_RS line
-Additionally you may change some kissnp2 / kissreads options. In this case you may change the two corresponding lines in the run_discoSnp.sh file. To know the possible options, type ./tools/kissnp2 and/or ./tools/kissreads without options. Note that usually, changing these options is not necessary. 
-
-Sample example:
-You can test discoSnp on a toy example containing 3 SNPs:
-./run_discoSnp.sh -r "data_sample/reads_sequence1.fasta data_sample/reads_sequence2.fasta.gz"
-Output
-Final results are in prefix_coherent_k_kval_c_cval.fa file. This is a simple fasta file composed of a succession of pairs of sequences. Each pair corresponds to a SNP. Let's look at an example :
->SNP_higher_path_2|high|left_unitig_length_472|right_unitig_length_261|left_contig_length_472|right_contig_length_378|C1_8|C2_120|rank_0.88900
-ttgcggataccgttgagacatcttataagtagacgcaatgcggaatcttatagaatcgcccgatagcgttgtgttggtggacacggctgattaccctctcacccgcgctattagcttccataccacctgcggccatccattaagatccgctgctcctcacgaaaaaagaattaataagaagtcccgtaacatgcggatttggtagtcgttatagacaactttactggggcgaactaaaacgcttgtggacagaattttggcagtggcaattaatctctaatgatgtgatattagggtctaaaatgtaagaattcggtgagttagattggacaaggggatccgaagatgttttggcgcagttagtcacagggggagcccctgcctacaaaaagcgcttactgttgactgtctagggatacagcgaaagcggcagtcgttgaagcaaaagtgatatgtgcgacactgcatctagGCAGCGCAACAACGCAACAGCTCGAGGTGTACTTCGCAGAGAAACCGCACGTCCAGTTCTAacactctcatatgtgctcgtcgtttatgctttcggcgtgaaaactggtgcgccggtgtctggagaccatccttcttgcgtatgactccaaggacagccatcacggtttgtgggttcactgggactgtcacgcttaaccggacggaactcgagaaggcatacgactggtcgtaagaccgctctgatccgacaccaccataacgcggcactcatgattatcatcacttttttagtccctattacagagctgccgggtggatgactctctaccgcgctctgtggaagtgcacttgatcgttttgctgtagaaaaaacttaataaacagaatgccgatgaaggcactactgtactaatagggccgggctacatgttaactac
->SNP_lower_path_2|high|left_unitig_length_472|right_unitig_length_261|left_contig_length_472|right_contig_length_378|C1_118|C2_6|rank_0.88900
-ttgcggataccgttgagacatcttataagtagacgcaatgcggaatcttatagaatcgcccgatagcgttgtgttggtggacacggctgattaccctctcacccgcgctattagcttccataccacctgcggccatccattaagatccgctgctcctcacgaaaaaagaattaataagaagtcccgtaacatgcggatttggtagtcgttatagacaactttactggggcgaactaaaacgcttgtggacagaattttggcagtggcaattaatctctaatgatgtgatattagggtctaaaatgtaagaattcggtgagttagattggacaaggggatccgaagatgttttggcgcagttagtcacagggggagcccctgcctacaaaaagcgcttactgttgactgtctagggatacagcgaaagcggcagtcgttgaagcaaaagtgatatgtgcgacactgcatctagGCAGCGCAACAACGCAACAGCTCGAGGTGTTCTTCGCAGAGAAACCGCACGTCCAGTTCTAacactctcatatgtgctcgtcgtttatgctttcggcgtgaaaactggtgcgccggtgtctggagaccatccttcttgcgtatgactccaaggacagccatcacggtttgtgggttcactgggactgtcacgcttaaccggacggaactcgagaaggcatacgactggtcgtaagaccgctctgatccgacaccaccataacgcggcactcatgattatcatcacttttttagtccctattacagagctgccgggtggatgactctctaccgcgctctgtggaagtgcacttgatcgttttgctgtagaaaaaacttaataaacagaatgccgatgaaggcactactgtactaatagggccgggctacatgttaactac
-In this example a SNP A/T is found (underlined here). The central sequence of length 2k-1 (here 2*31-1=61) is seen in upper case, while the two (left and right) extensions are seen in lower case. 
-The comments are formatted as follow :
-	>SNP_higher/lower_path_id|high/low|left_unitig_length_int|right_unitigtig_length_int|left_contig_length_int|right_contig_length_int|C1_int|C2_int|[Q1_int|Q2_int|]rank_float
-higher/lower: one of the two alleles 
-id: id of the SNP: each SNP (couple of sequences) has a unique id, here 3.
-high/low: sequence complexity. If the sequece if of low complexity (e.g. ATATATATATATATAT) this variable would be low
-left_unitig_length: size of the full left extension. Here 472
-right_unitig_length: size of the right extension. Here 261
-left_contig_length: size of the full left extension. Here 472
-right_contig_length: size of the right extension. Here 378
-C1: number of reads mapping the central upper case sequence from the first read set
-C2: number of reads mapping the central upper case sequence from the second read set
-C3 [if input data were at least 3 read sets]:  number of reads mapping the central upper case sequence from the third read set
-C4, C5, ...
-Q1 [if reads were given in fastq]: average phred quality of the central nucleotide (here A or T) from the mapped reads from the first read set.
-Q2 [if reads were given in fastq]: average phred quality of the central nucleotide (here A or T) from the mapped reads from the second read set.
-Q3 [if the data were at least 3 fastq read sets]: average phred quality of the central nucleotide (here A or T) from the mapped reads from the third read set.
-Q4, Q5, …
-rank: ranks the predictions according to their read coverage in each condition favoring SNPs that are discriminant between conditions (Phi coefficient) (see publication)
-Extensions: differences between unitig and contigs (from version 2.1.1.3)
-By default in the pipeline, the found SNPs (of length 2k-1) are extended using a contiger. The output contains such contigs and their lengths are shown in the header (left_contig_length and right_contig_length). Moreover, a contig may hide some small polymorphism such as substitutions and/or indels. The output also proposes the length of the longuest extension not containing any such polymorphism. These extensions are called unitigs (defined as « A uniquely assembleable subset of overlapping fragments »).
-Output Analyze
-From a fasta format to a csv format: If you wish to analyze the results in a tabulated format: 
-# python output_analyses/discoSnp_to_csv.py discoSnp_output.fa 
-will output a .csv tabulated file containing on each line the content of 4 lines of the .fa, replacing the '|' character by spaces and removing the CX_
-example with previously used SNP example:
->SNP_higher_path_3 high left_contig_length_86 right_contig_length_52 78 5 rank_0.89839 tctctaccgcgctctgtggaagtgcacttgatcgttttgctgtagaaaaaacttaataaacagaatgccgatgaaggcactactgtACTAATAGGGCCGGGCTACATGTTAACTACAAGGCTATAACCTATTGATGACCCGGTCCATacataacttggtatcgtgcatgtagcgttcaagggctatagcaattccgacg >SNP_lower_path_3 high left_contig_length_86 right_contig_length_52 4 91 rank_0.89839 tctctaccgcgctctgtggaagtgcacttgatcgttttgctgtagaaaaaacttaataaacagaatgccgatgaaggcactactgtACTAATAGGGCCGGGCTACATGTTAACTACTAGGCTATAACCTATTGATGACCCGGTCCATacataacttggtatcgtgcatgtagcgttcaagggctatagcaattccgacg
-Genotyping the results: If you wish to genotype your results:
-#python output_analyses/discoSnp_to_genotypes.py discoSnp_output.fa threshold_value
-will output a file containing on each line the “genotypes” of a SNP.  For each input data set it indicates if the SNP is:
-heterozygous ALT1 path (coverage ALT1 >= threshold and ALT2 < threshold): 1
-heterozygous ALT2 path (coverage ALT1 < threshold and ALT2 >= threshold): -1
-homozygous (coverage ALT1 >= threshold and ALT2 >= threshold): 2
-absent (coverage ALT1 < threshold and ALT2 < threshold): 0
-then it outputs the central sequence of length 2k-1 replacing the central position by ALT1/ALT2
-example with previously used SNP example and threshold 20:
-GENOTYPES_SNP_3_THRESHOLD_20 1 -1 TACTAATAGGGCCGGGCTACATGTTAACTACA/TAGGCTATAACCTATTGATGACCCGGTCCATA
--- a/clear_kissnp_kissreads.sh
+++ b/clear_kissnp_kissreads.sh
-#Copyright inria / irisa (2013)
-#
-#
-#raluca.uricaru@gmail.com
-#pierre.peterlongo@inria.fr
-#
-#This software is a computer program whose purpose is to call SNPs from NGS reads.
-#
-#This software is governed by the CeCILL license under French law and
-#abiding by the rules of distribution of free software.  You can  use,
-#modify and/ or redistribute the software under the terms of the CeCILL
-#license as circulated by CEA, CNRS and INRIA at the following URL
-#"http:#www.cecill.info".
-#
-#As a counterpart to the access to the source code and  rights to copy,
-#modify and redistribute granted by the license, users are provided only
-#with a limited warranty  and the software's author,  the holder of the
-#economic rights,  and the successive licensors  have only  limited
-#liability.
-#
-#In this respect, the user's attention is drawn to the risks associated
-#with loading,  using,  modifying and/or developing or reproducing the
-#software by the user in light of its specific status of free software,
-#that may mean  that it is complicated to manipulate,  and  that  also
-#therefore means  that it is reserved for developers  and  experienced
-#professionals having in-depth computer knowledge. Users are therefore
-#encouraged to load and test the software's suitability as regards their
-#requirements in conditions enabling the security of their systems and/or
-#data to be ensured and,  more generally, to use and operate it in the
-#same conditions as regards security.
-#
-#The fact that you are presently reading this means that you have had
-#knowledge of the CeCILL license and that you accept its terms.
-
-
-cd kissnp2/
-make clean
-cd ../kissreads/
-make clean
-cd ..
-
-rm -f ./tools/kissreads
-rm -f ./tools/kissnp2
--- a/compile_kissnp_kissreads.sh
+++ b/compile_kissnp_kissreads.sh
-#Copyright inria / irisa (2013)
-#
-#
-#raluca.uricaru@gmail.com
-#pierre.peterlongo@inria.fr
-#
-#This software is a computer program whose purpose is to call SNPs from NGS reads.
-#
-#This software is governed by the CeCILL license under French law and
-#abiding by the rules of distribution of free software.  You can  use,
-#modify and/ or redistribute the software under the terms of the CeCILL
-#license as circulated by CEA, CNRS and INRIA at the following URL
-#"http:#www.cecill.info".
-#
-#As a counterpart to the access to the source code and  rights to copy,
-#modify and redistribute granted by the license, users are provided only
-#with a limited warranty  and the software's author,  the holder of the
-#economic rights,  and the successive licensors  have only  limited
-#liability.
-#
-#In this respect, the user's attention is drawn to the risks associated
-#with loading,  using,  modifying and/or developing or reproducing the
-#software by the user in light of its specific status of free software,
-#that may mean  that it is complicated to manipulate,  and  that  also
-#therefore means  that it is reserved for developers  and  experienced
-#professionals having in-depth computer knowledge. Users are therefore
-#encouraged to load and test the software's suitability as regards their
-#requirements in conditions enabling the security of their systems and/or
-#data to be ensured and,  more generally, to use and operate it in the
-#same conditions as regards security.
-#
-#The fact that you are presently reading this means that you have had
-#knowledge of the CeCILL license and that you accept its terms.
-
-
-k=29
-if test $# -eq 0
-then
-	k=29
-else
-    k=$1
-fi
-
-
-mkdir tools
-
-
-cd kissnp2
-make k=$k
-cd ../
-cp kissnp2/kissnp2 tools
-if [ $? -ne 0 ]
-then
-echo "there was a problem compiling kissnp2, sorry. Please fix the problem or contact pierre.peterlongo@inria.fr."
-exit
-fi
-
-cd kissreads
-make MYFLAGS=-DOMP k=$k # VERSION KISSREADS PARALLEL
-#make k=$k # VERSION KISSREADS SEQUENCIAL
-cd ..
-cp kissreads/kissreads tools
-if [ $? -ne 0 ]
-then
-echo "there was a problem compiling kissreads, sorry. Please fix the problem or contact pierre.peterlongo@inria.fr."
-exit
-fi
-
-
-echo "Compiling is done, if possible, :"
-echo -e "\t 1/ copy executables \"tools/kissnp2\" and \"tools/kissreads\" in a directory member of the PATH environment variable (e.g. /usb/local/bin)"
-echo -e "\t 2/ replace PATH_RS=\"./tools\" by PATH_RS=\"\" in the \"run_discoSnp.sh\" configuration file"
-MY_PATH="`( cd \"$MY_PATH\" && pwd )`"  # absolutized and normalized
-if [ -z "$MY_PATH" ] ; then
-# error; for some reason, the path is not accessible
-# to the script (e.g. permissions re-evaled after suid)
-exit 1  # fail
-fi
-echo -e "   or"
-echo "Leave it as is. In this case, if working outside this current directory (\"$MY_PATH\"), you will have to indicate in the \"run_discoSnp.sh\" where executables \"kissnp2\" and  \"kissreads\" are located by changing the value of the PATH_RS variable"
--- a/data_sample/reads_sequence1.fasta
+++ b/data_sample/reads_sequence1.fasta
--- a/debian/changelog
+++ b/debian/changelog
+discosnp (2.3.0-1) UNRELEASED; urgency=medium
+
+  * Point watch file to Github
+  * New upstream version
+  * debhelper 11
+  * Point Vcs fields to salsa.debian.org
+  * Standards-Version: 4.2.1
+
+ -- Andreas Tille <tille@debian.org>  Sat, 20 Oct 2018 10:56:58 +0200
+
 discosnp (1.2.6-2) unstable; urgency=medium

  * Secure URI in watch file

--- a/debian/compat
+++ b/debian/compat
-10
+11
--- a/debian/control
+++ b/debian/control
@@ -4,22 +4,19 @@ Uploaders: Olivier Sallou <osallou@debian.org>,
           Andreas Tille <tille@debian.org>
 Section: science
 Priority: optional
-Build-Depends: debhelper (>= 10),
-               bc,
-               zlib1g-dev,
-               help2man
-Standards-Version: 4.1.1
-Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/discosnp.git
-Vcs-Git: https://anonscm.debian.org/git/debian-med/discosnp.git
+Build-Depends: debhelper (>= 11~),
+               cmake,
+               libgatbcore-dev
+Standards-Version: 4.2.1
+Vcs-Browser: https://salsa.debian.org/med-team/discosnp
+Vcs-Git: https://salsa.debian.org/med-team/discosnp.git
 Homepage: http://colibread.inria.fr/discosnp/

 Package: discosnp
 Architecture: any
 Depends: ${misc:Depends},
         ${shlibs:Depends},
-         zlib1g,
-         bc,
-         python
+         ${python3:Depends}
 Description: discovering Single Nucleotide Polymorphism from raw set(s) of reads
 Software discoSnp is designed for discovering Single Nucleotide 
 Polymorphism (SNP) from raw set(s) of reads obtained with Next Generation

--- a/debian/patches/fix_build
+++ b/debian/patches/fix_build
-Subject: Add hardening
-Description: makefiles do not import default CFLAGS,...
-this patch add debian cflags for hardening
-Forwarded: no
-Author: Olivier Sallou <osallou@debian.org>
-Last-Updated: 2014-02-01
--- a/kissnp2/makefile
-+++ b/kissnp2/makefile
-@@ -33,7 +33,10 @@
- #knowledge of the CeCILL license and that you accept its terms.
- 
- CC=g++
-CFLAGS=  -O4 -lz -DMINIA_IS_IN_PARENT_FOLDER
-+CPPFLAGS:=$(shell dpkg-buildflags --get CPPFLAGS)
-+CFLAGS = $(shell dpkg-buildflags --get CFLAGS) $(CPPFLAGS) -O4 -lz -DMINIA_IS_IN_PARENT_FOLDER
-+LDFLAGS:=$(shell dpkg-buildflags --get LDFLAGS)
-+
- #CFLAGS=  -O4 -lz -DMINIA_IS_IN_PARENT_FOLDER -DDONTMARK
- SRC=../minia/Pool.cpp ../minia/Bank.cpp ../minia/Bloom.cpp ../minia/Hash16.cpp ../minia/Terminator.cpp ../minia/Kmer.cpp ../minia/Traversal.cpp ../minia/LinearCounter.cpp ../minia/Set.cpp ../minia/Utils.cpp ../minia/SortingCount.cpp ../minia/Debloom.cpp ../minia/OAHash.cpp Kmer_for_kissnp2.cpp SNP.cpp filter.cpp IterativeExtensions.cpp commons.cpp
- EXEC=kissnp2
-@@ -41,7 +44,7 @@
- all: $(EXEC)
- 
- ifeq ($(prof),1)
- CFLAGS=-O3 -pg -lz
-+ CFLAGS= $(shell dpkg-buildflags --get CFLAGS) $(CPPFLAGS) -O3 -pg -lz
- endif
- ifeq ($(deb),1)
-  CFLAGS+=-O0 -DASSERTS -g -lz 
-@@ -79,7 +82,7 @@
- all: $(EXEC)
- 
- kissnp2:  $(OBJ) kissnp2.cpp
-	$(CC) -o $@ $^ $(CFLAGS) 
-+	$(CC) -o $@ $^ $(CFLAGS) $(LDFLAGS)
- 
- %.o: %.cpp %.h
- 	$(CC) -o $@ -c $< $(CFLAGS)
--- a/minia/makefile
-+++ b/minia/makefile
-@@ -1,4 +1,7 @@
-CFLAGS+= -O4 -D_FILE_OFFSET_BITS=64 # needed to handle files > 2 GB on 32 bits systems
-+#CFLAGS+= -O4 -D_FILE_OFFSET_BITS=64 # needed to handle files > 2 GB on 32 bits systems
-+CPPFLAGS:=$(shell dpkg-buildflags --get CPPFLAGS)
-+CFLAGS = $(shell dpkg-buildflags --get CFLAGS) $(CPPFLAGS) -O4 -D_FILE_OFFSET_BITS=64
-+LDFLAGS:=$(shell dpkg-buildflags --get LDFLAGS)
- SRC=Pool.cpp Bank.cpp Bloom.cpp Hash16.cpp LargeInt.cpp Kmer.cpp Terminator.cpp Traversal.cpp LinearCounter.cpp Set.cpp Utils.cpp SortingCount.cpp Debloom.cpp OAHash.cpp
- EXEC=minia
- OBJ= $(SRC:.cpp=.o)
-@@ -60,7 +63,7 @@
- 	$(MAKE) $(EXEC)
- 
- minia: $(OBJ) Minia.cpp
-	$(CXX) -o $@ $(OBJ) Minia.cpp $(CFLAGS) -lz
-+	$(CXX) -o $@ $(OBJ) Minia.cpp $(CFLAGS) $(LDFLAGS) -lz
- 
- %.o: %.cpp %.h
- 	$(CXX) -o $@ -c $< $(CFLAGS)
--- a/kissreads/Makefile
-+++ b/kissreads/Makefile
-@@ -20,10 +20,10 @@
- # optimized flags
- 
- #CFLAGS=-Wall -O3 -lz -fopenmp -DOMP #openmp is incompatiblre with clang #@!§
-CFLAGS= -O3 -lz -fopenmp -DOMP #openmp is incompatiblre with clang #@!
-+CFLAGS = $(shell dpkg-buildflags --get CFLAGS) $(CPPFLAGS) -Wall -O3 -lz
- CFLAGS+=$(MYFLAGS)
- #LDFLAGS+=-Wall -lm -O3 -lz -DOMP -fopenmp#-fopenmp
-LDFLAGS+= -lm -O3 -lz -DOMP -fopenmp#-fopenmp
-+LDFLAGS:=$(shell dpkg-buildflags --get LDFLAGS) -Wall -lm -O3 -lz
- 
- ## debug flags
- 
-@@ -51,7 +51,7 @@
- 	@echo "#################"
- 
- kissreads: $(OBJ) 
-	$(CC)  -o $@ $^ $(LDFLAGS)
-+	$(CC)  -o $@ $^ $(CFLAGS) $(LDFLAGS)
- 
- 
- kissReads.o: $(INCLDIR)coherence_algorithm.h  $(INCLDIR)couple.h $(INCLDIR)extension_algorithm.h $(INCLDIR)fragment_info.h  $(INCLDIR)libchash.h  $(INCLDIR)outputs.h $(INCLDIR)commons.h $(INCLDIR)extending_fragment.h $(INCLDIR)fragment_index.h $(INCLDIR)hash.h $(INCLDIR)list.h 
--- a/debian/patches/series
+++ b/debian/patches/series
-fix_build
--- a/debian/rules
+++ b/debian/rules
@@ -7,27 +7,4 @@ include /usr/share/dpkg/default.mk

 %:
 	dh $@
-
-override_dh_auto_build:
-	dh_auto_build
-	cd kissnp2 && make k=29
-	cd kissreads && make k=29
-
-override_dh_install:
-	cp output_analyses/discoSnp_to_genotypes.py output_analyses/discoSnp_to_genotypes
-	cp output_analyses/discoSnp_to_csv.py output_analyses/discoSnp_to_csv
-	help2man --help-option=-h --no-discard-stderr --version-string=$(DEB_VERSION_UPSTREAM) kissnp2/kissnp2  > kissnp2.1
-	help2man --help-option=-h --no-discard-stderr --version-string=$(DEB_VERSION_UPSTREAM) kissreads/kissreads > kissreads.1
-	dh_install
-
-override_dh_clean:
-	cd minia && make clean
-	cd kissnp2 && make clean
-	cd kissreads && make clean
-	rm -f output_analyses/discoSnp_to_genotypes
-	rm -f output_analyses/discoSnp_to_csv
-	rm -f kissnp2/kissnp2
-	rm -f kissreads/kissreads
-	rm -f kissreads.1
-	rm -f kissnp2.1
 	dh_clean
--- a/debian/watch
+++ b/debian/watch
 version=4

-https://colibread.inria.fr/discosnp/ https://colibread.inria.fr/files/\d+/\d+/discoSnp_([.\d]+)\.zip
+https://github.com/GATB/DiscoSnp/releases .*/archive/v?@ANY_VERSION@@ARCHIVE_EXT@
--- a/doc/discoSnp_user_guide.odt
+++ b/doc/discoSnp_user_guide.odt