Skip to content
Commits on Source (11)
......@@ -10,7 +10,7 @@ cmake_minimum_required(VERSION 2.6)
################################################################################
SET (gatb-tool_VERSION_MAJOR 2)
SET (gatb-tool_VERSION_MINOR 4)
SET (gatb-tool_VERSION_PATCH 2)
SET (gatb-tool_VERSION_PATCH 3)
IF (DEFINED MAJOR)
SET (gatb-tool_VERSION_MAJOR ${MAJOR})
......@@ -84,6 +84,7 @@ SET (CPACK_SOURCE_IGNORE_FILES
)
# For creating the BINARY package we include the files we want
INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/discoSnpRAD DESTINATION .)
INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc DESTINATION .)
INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test DESTINATION .)
INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts DESTINATION . FILES_MATCHING REGEX ".*\\.(py|sh)$" PATTERN "jenkins" EXCLUDE)
......
discosnp (2.4.2-1) UNRELEASED; urgency=medium
discosnp (2.4.3-1) UNRELEASED; urgency=medium
* Team upload.
[ Steffen Moeller ]
* New upstream version
* Reported fixed typos upstream
- https://github.com/GATB/DiscoSnp/pull/15/
- https://github.com/GATB/DiscoSnp/pull/14/
* FTBFS:
-- UNABLE TO FIND A DIRECTORY FOR gatb-core...
Call Stack (most recent call first):
/usr/lib/x86_64-linux-gnu/cmake/GatbCore.cmake:36 (LOOKUP_PATH)
CMakeLists.txt:47 (include)
-- Configuring incomplete, errors occurred!
-- Steffen Moeller <moeller@debian.org> Tue, 17 Dec 2019 14:33:46 +0100
[ Andreas Tille ]
* Fix Python interpreter in several scripts and autopkgtest
Closes: #942982
* Standards-Version: 4.5.0 (routine-update)
* Remove trailing whitespace in debian/changelog (routine-update)
* Set upstream metadata fields: Bug-Database, Bug-Submit, Repository.
-- Andreas Tille <tille@debian.org> Mon, 27 Jan 2020 14:14:13 +0100
discosnp (2.3.0-3) unstable; urgency=medium
......
......@@ -12,7 +12,7 @@ Build-Depends: debhelper-compat (= 12),
libboost-dev,
libhdf5-dev,
zlib1g-dev
Standards-Version: 4.4.1
Standards-Version: 4.5.0
Vcs-Browser: https://salsa.debian.org/med-team/discosnp
Vcs-Git: https://salsa.debian.org/med-team/discosnp.git
Homepage: http://colibread.inria.fr/discosnp/
......
scripts usr/share/discosnp
scripts_RAD usr/share/discosnp
run_discoSnp*.sh usr/bin
discoSnpRAD usr/bin
......@@ -582,7 +582,7 @@ Description: Result of 2to3
--- a/discoSnpRAD/run_discoSnpRad.sh
+++ b/discoSnpRAD/run_discoSnpRad.sh
@@ -75,7 +75,7 @@ verbose=1
@@ -76,7 +76,7 @@ clustering="false"
short_read_connector_path=""
option_phase_variants=""
#EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
......@@ -591,7 +591,7 @@ Description: Result of 2to3
if [ -d "$EDIR/../build/" ] ; then # VERSION SOURCE COMPILED
read_file_names_bin=$EDIR/../build/bin/read_file_names
@@ -556,7 +556,7 @@ fi
@@ -570,7 +570,7 @@ fi
echo -e "${yellow}\t############################################################"
echo -e "\t#################### REDUNDANCY REMOVAL ###################"
echo -e "\t############################################################$reset"
......
......@@ -2,10 +2,8 @@ Author: Andreas Tille <tille@debian.org>
Last-Update: Mon, 21 Jan 2019 09:01:19 +0100
Description: Adapt test scripts to Debian PATH
Index: discosnp/test/simple_test.sh
===================================================================
--- discosnp.orig/test/simple_test.sh
+++ discosnp/test/simple_test.sh
--- a/test/simple_test.sh
+++ b/test/simple_test.sh
@@ -1,6 +1,6 @@
#!/bin/bash
......@@ -14,10 +12,8 @@ Index: discosnp/test/simple_test.sh
diff discoRes_k_31_c_3_D_100_P_3_b_0_coherent.fa ref_discoRes_k_31_c_3_D_100_P_3_b_0_coherent.fa
if [ $? -ne 0 ] ; then
Index: discosnp/test/large_test/local_large_test.sh
===================================================================
--- discosnp.orig/test/large_test/local_large_test.sh
+++ discosnp/test/large_test/local_large_test.sh
--- a/test/large_test/local_large_test.sh
+++ b/test/large_test/local_large_test.sh
@@ -2,7 +2,7 @@
#####################
# Default option run:
......
......@@ -4,16 +4,18 @@ Description: Failed to fix these install dirs, deactivate completely and use dh_
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,12 +81,12 @@ SET (CPACK_SOURCE_IGNORE_FILES
@@ -84,13 +84,13 @@ SET (CPACK_SOURCE_IGNORE_FILES
)
# For creating the BINARY package we include the files we want
-INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/discoSnpRAD DESTINATION .)
-INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc DESTINATION .)
-INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test DESTINATION .)
-INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts DESTINATION . FILES_MATCHING REGEX ".*\\.(py|sh)$" PATTERN "jenkins" EXCLUDE)
-INSTALL (FILES ${CMAKE_CURRENT_SOURCE_DIR}/run_discoSnp++.sh DESTINATION .)
-INSTALL (FILES ${CMAKE_CURRENT_SOURCE_DIR}/README.md DESTINATION .)
-INSTALL (FILES ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE DESTINATION .)
+#INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/discoSnpRAD DESTINATION .)
+#INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc DESTINATION .)
+#INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test DESTINATION .)
+#INSTALL (DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts DESTINATION . FILES_MATCHING REGEX ".*\\.(py|sh)$" PATTERN "jenkins" EXCLUDE)
......
......@@ -6,10 +6,8 @@ Description: There is no point in delivering read_file_names in /usr/bin
.
Some other issues of these scripts are fixed as well.
Index: discosnp/run_discoSnp++.sh
===================================================================
--- discosnp.orig/run_discoSnp++.sh
+++ discosnp/run_discoSnp++.sh
--- a/run_discoSnp++.sh
+++ b/run_discoSnp++.sh
@@ -65,7 +65,8 @@ verbose=1
stop_after_kissnp=0
e=""
......
......@@ -2,10 +2,8 @@ Author: Andreas Tille <tille@debian.org>
Last-Update: Mon, 21 Jan 2019 09:01:19 +0100
Description: Fix spelling
Index: discosnp/tools/kissnp2/src/Kissnp2.cpp
===================================================================
--- discosnp.orig/tools/kissnp2/src/Kissnp2.cpp
+++ discosnp/tools/kissnp2/src/Kissnp2.cpp
--- a/tools/kissnp2/src/Kissnp2.cpp
+++ b/tools/kissnp2/src/Kissnp2.cpp
@@ -53,8 +53,8 @@ Kissnp2::Kissnp2 () : Tool ("Kissnp2")
getParser()->push_front (new OptionNoParam (STR_DISCOSNP_LOW_COMPLEXITY, "conserve low complexity SNPs", false));
getParser()->push_front (new OptionOneParam (STR_MAX_AMBIGOUS_INDELS, "Maximal size of ambiguity of INDELs. INDELS whose ambiguity is higher than this value are not output", false, "20"));
......@@ -35,10 +33,8 @@ Index: discosnp/tools/kissnp2/src/Kissnp2.cpp
}
Index: discosnp/tools/kissreads2/src/Kissreads2.cpp
===================================================================
--- discosnp.orig/tools/kissreads2/src/Kissreads2.cpp
+++ discosnp/tools/kissreads2/src/Kissreads2.cpp
--- a/tools/kissreads2/src/Kissreads2.cpp
+++ b/tools/kissreads2/src/Kissreads2.cpp
@@ -41,7 +41,7 @@ Kissreads2::Kissreads2 () : Tool ("Kissr
/** We add options known by kissnp2. */
......
......@@ -4,7 +4,7 @@ Description: Use cmake input file of Debian packaged gatb-core
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,8 +29,8 @@ ENDIF()
@@ -32,8 +32,8 @@ ENDIF()
################################################################################
# Define cmake modules directory
################################################################################
......@@ -91,6 +91,28 @@ Description: Use cmake input file of Debian packaged gatb-core
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIRZ})
# we define which libraries to be linked with project binary
-target_link_libraries (${PROJECT_NAME} ${gatb-core-libraries})
+target_link_libraries (${PROJECT_NAME} gatbcore hdf5 z )
# We copy the project binary to the 'bin' directory
-INSTALL (TARGETS ${PROJECT_NAME} DESTINATION bin)
\ No newline at end of file
+INSTALL (TARGETS ${PROJECT_NAME} DESTINATION bin)
--- a/tools/create_coverage_h5_file/CMakeLists.txt
+++ b/tools/create_coverage_h5_file/CMakeLists.txt
@@ -16,14 +16,16 @@ include_directories (src ${gatb-core-in
file (GLOB_RECURSE ProjectFiles src/*)
# we define the artifact to be built: the project binary
+include(GNUInstallDirs)
+LINK_DIRECTORIES( /usr/${CMAKE_INSTALL_LIBDIR}/hdf5/serial )
add_executable (${PROJECT_NAME} ${ProjectFiles})
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIRZ})
# we define which libraries to be linked with project binary
-target_link_libraries (${PROJECT_NAME} ${gatb-core-libraries})
+target_link_libraries (${PROJECT_NAME} gatbcore hdf5 z )
......
Tests: run-unit-test
Depends: @, python
Depends: @, python3
Restrictions: allow-stderr
Bug-Database: https://github.com/GATB/DiscoSnp/issues
Bug-Submit: https://github.com/GATB/DiscoSnp/issues/new
Registry:
- Name: bio.tools
Entry: discosnp
......@@ -7,5 +9,5 @@ Registry:
Entry: OMICS_00267
- Name: conda:bioconda
Entry: discosnp
Repository: https://github.com/GATB/DiscoSnp
Repository: https://github.com/GATB/DiscoSnp.git
Repository-Browse: https://github.com/GATB/DiscoSnp
......@@ -11,22 +11,25 @@ Gauthier, J., Mouden, C., Suchan, T., Alvarez, N., Arrigo, N., Riou, C., Lemait
## Installation
* discoSnp++
* discoSnp++ (see [../README.md](../README.md))
* `short_read_connector` must have been downloaded and installed (clustering task). [https://github.com/GATB/short_read_connector](https://github.com/GATB/short_read_connector)
## Usage
```
./run_discoSnpRad.sh --fof read_file_of_files --src_path <directory> [discoSnp++ OPTIONS]
./run_discoSnpRad.sh -r read_file_of_files -S -p myDiscoSnpRADresult [discoSnp++ OPTIONS]
```
Clustering
**Clustering option** (RAD-specific option):
```
-S|--src_path <directory>
**absolute** path to short_read_connector directory, containing the "short_read_connector.sh" file.
-S|--src [src_path]
performs clustering of variants with short_read_connector
src_path: **absolute** path to short_read_connector directory, containing the "short_read_connector.sh" file.
-Note1: short read connector must be compiled.
-Note2: with this option, discoSnpRad provide a vcf file containing SNPs and INDELS, clustered by locus
-Note2: if no value is given, it assumes short_read_connector.sh is in the PATH env variable.
-Note3: with this option, discoSnpRad outputs a vcf file containing the variants clustered by locus.
```
All other options are described in [discoSnp++ README](../README.md). Note that many discoSNP++ parameters have here default values, specifically adapted to RAD-seq data.
......@@ -39,8 +42,33 @@ To see all options:
## Output
* a log file reminds all filtering steps applied and the name of the output .vcf file
* a vcf file containing results of filtering and clustering
When run with output prefix name `myDiscoSnpRADresult`, the main output file is :
* `myDiscoSnpRADresult_[parameter_values]_clustered.vcf`: the final set of variants, with various information, including clustering per locus information (see VCF format below).
* or `myDiscoSnpRADresult_[parameter_values].vcf` if no clustering was performed.
Additionnally, several other files are output that can be usefull :
* `myDiscoSnpRADresult_[parameter_values]_raw.fa`: the raw set of variants in fasta format, prior to any filtering and clustering steps.
* `myDiscoSnpRADresult_[graph_parameter_values].h5`: the de Bruijn graph in h5 format (reusable with any GATB tool)
* `myDiscoSnpRADresult_read_files_correspondance.txt`: the correspondence between read file names and IDs given as genotypes in the vcf
* the standard output reminds all filtering steps applied and the name of the output .vcf file
#### VCF format
Each variant is described with:
* an ID: `ID` column,
* two alleles (`REF` and `ALT` columns),
* a quality value: `INFO` column, `Rk`, between 0 (bad) and 1 (best),
* some clustering information: `INFO` field: with the locus id (`Cluster`) and its number of varying sites (`ClSize`),
* and for each sample in the genotype columns (`G1`, `G2`,...): the inferred genotype (`0/0`, `0/1`, `1/1`or `./.`for missing value), the read depths (`RD` total, `AD`per allele), among others.
## Content of this directory
......
......@@ -22,7 +22,7 @@ echo "this script manages bubble clustering from a discofile.fa file, and the in
echo " 1/ Remove variants with more than 95% missing genotypes and low rank (<0.4)"
echo " 2/ Cluster variants per locus"
echo " 3/ Format the variants in a vcf file with cluster information"
echo "Usage: ./discoRAD_clustering.sh -f discofile -s SRC_directory/ -o output_file.vcf"
echo "Usage: ./discoRAD_clustering.sh -f discofile -s SRC_path -o output_file.vcf"
# echo "nb: all options are MANDATORY\n"
echo "OPTIONS:"
echo "\t -f: DiscoSnp fasta output containing coherent predictions"
......@@ -44,7 +44,7 @@ while getopts "f:s:o:hw" opt; do
;;
s)
short_read_connector_directory=$OPTARG
short_read_connector_path=$OPTARG
;;
o)
......@@ -66,7 +66,7 @@ if [[ -z "${rawdiscofile}" ]]; then
echo "${red}-f is mandatory$reset" >&2
exit
fi
if [[ -z "${short_read_connector_directory}" ]]; then
if [[ -z "${short_read_connector_path}" ]]; then
echo "${red}-s is mandatory$reset" >&2
exit
fi
......@@ -80,7 +80,7 @@ EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
if [ -d "$EDIR/../../build/" ] ; then # VERSION SOURCE COMPILED
BINDIR=$EDIR"/../../build/bin"
else # VERSION BINARY
BINDIR=$EDIR"/../../bin"
BINDIR=$EDIR/../../bin
fi
rawdiscofile_base=$( basename "${rawdiscofile}" .fa)
......@@ -150,7 +150,7 @@ fi
#ls ${disco_simpler}.fa > ${disco_simpler}.fof
# Compute sequence similarities
cmdSRC="${short_read_connector_directory}/short_read_connector.sh -b ${disco_simpler}.fa -q ${disco_simpler}.fof -s 0 -k ${usedk} -a 1 -l -p ${disco_simpler} 1>&2 "
cmdSRC="${short_read_connector_path} -b ${disco_simpler}.fa -q ${disco_simpler}.fof -s 0 -k ${usedk} -a 1 -l -p ${disco_simpler} 1>&2 "
echo $green$cmdSRC$cyan
if [[ "$wraith" == "false" ]]; then
eval $cmdSRC
......
......@@ -72,6 +72,7 @@ output_coverage_option=""
genotyping="-genotype"
remove=1
verbose=1
clustering="false"
short_read_connector_path=""
option_phase_variants=""
#EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
......@@ -106,17 +107,19 @@ function help {
echo " ************"
echo "run_discoSnpRad.sh, pipelining kissnp2 and kissreads and clustering per locus for calling SNPs and small indels from RAD-seq data without the need of a reference genome"
echo "Version "$version
echo "Usage: ./run_discoSnpRad.sh --fof read_file_of_files --src_path <directory> [OPTIONS]"
echo "Usage: ./run_discoSnpRad.sh --fof read_file_of_files --src [src_path] [OPTIONS]"
echo -e "MANDATORY"
echo -e "\t -r|--fof <file name of a file of file(s)>"
echo -e "\t\t The input read files indicated in a file of file(s)"
echo -e "\t\t Example: -r bank.fof with bank.fof containing the two lines \n\t\t\t data_sample/reads_sequence1.fasta\n\t\t\t data_sample/reads_sequence2.fasta.gz"
echo -e "\nOPTIONS"
echo -e "\t -S|--src_path <directory>"
echo -e "\t\t **absolute** path to short_read_connector directory, containing the \"short_read_connector.sh\" file. "
echo -e "\t -S|--src [src_path]"
echo -e "\t\t performs clustering of variants with short_read_connector"
echo -e "\t\t src_path: **absolute** path to short_read_connector directory, containing the \"short_read_connector.sh\" file. "
echo -e "\t\t -Note1: short read connector must be compiled."
echo -e "\t\t -Note2: with this option, discoSnpRad provide a vcf file containing SNPs and INDELS, clustered by locus"
echo -e "\t\t -Note2: if no value is given, it assumes short_read_connector.sh is in the PATH env variable."
echo -e "\t\t -Note3: with this option, discoSnpRad outputs a vcf file containing the variants clustered by locus"
echo -e "\t -k | --k_size value <int value>"
echo -e "\t\t Set the length of used kmers. Must fit the compiled value."
......@@ -184,15 +187,13 @@ while :; do
-w)
wraith="true"
;;
-S|--src_path)
-S|--src)
clustering="true"
if [ "$2" ] && [ ${2:0:1} != "-" ] ; then # checks that there exists a second value and its is not the start of the next option
short_read_connector_path=$2
shift
else
die 'ERROR: "'$1'" option requires a non-empty option argument.'
fi
;;
-a|--ambiguity_max_size)
if [ "$2" ] && [ ${2:0:1} != "-" ] ; then # checks that there exists a second value and its is not the start of the next option
max_ambigous_indel=$2
......@@ -379,29 +380,42 @@ if [ -z "$read_sets" ]; then
exit 1
fi
src_file="$short_read_connector_path/short_read_connector.sh"
if [[ "$wraith" == "false" ]]; then
echo $yellow${src_file}$reset
fi
#Checks if clustering can be performed
if [[ "$wraith" == "false" ]]; then
if [[ "$clustering" == "true" ]]; then
# first tests the directory given by user if any
if [ -n "$short_read_connector_path" ]; then
src_file="$short_read_connector_path/short_read_connector.sh"
if [ -f "$src_file" ]; then
if [[ "$wraith" == "false" ]]; then
echo "${yellow}short_read_connector is $src_file$reset"
fi
echo "${yellow}short_read_connector path is $src_file$reset"
else
if [[ "$wraith" == "false" ]]; then
echo -e "${red}\t\t\t**************************************************************************"
echo -e "\t\t\t** WARNING: I cannot find short_read_connector (-S). "
echo -e "\t\t\t** $src_file does not exist"
echo -e "\t\t\t** I will not cluster variants per RAD locus"
echo -e "\t\t\t**************************************************************************"
echo $reset
clustering="false"
fi
else
#then tests if src is in the PATH env variable
src_file=$(command -v short_read_connector.sh)
if [ -n "$src_file" ]; then
echo "${yellow}short_read_connector path is $src_file$reset"
else
echo -e "${red}\t\t\t**************************************************************************"
echo -e "\t\t\t** WARNING: I cannot find short_read_connector in PATH. "
echo -e "\t\t\t** Try giving the absolute path of short_read_connector directory with option -S"
echo -e "\t\t\t** I will not cluster variants per RAD locus"
echo -e "\t\t\t**************************************************************************"
echo $reset
clustering="false"
fi
fi
fi
######### CHECK THE k PARITY ##########
rest=$(( $k % 2 ))
if [ $rest -eq 0 ]
......@@ -643,13 +657,13 @@ echo -e "\t######## CLUSTERING PER LOCUS AND/OR FORMATTING ###############"
echo -e "\t###############################################################$reset"
T="$(date +%s)"
if [ -f "$src_file" ]; then
if [[ "$clustering" == "true" ]]; then
if [[ "$wraith" == "false" ]]; then
echo "${yellow}Clustering and vcf formmatting$reset"
fi
final_output="${kissprefix}_clustered.vcf"
cmd="$EDIR/clustering_scripts/discoRAD_clustering.sh -f ${kissprefix}_raw.fa -s $short_read_connector_path -o ${final_output}"
echo $green$cmd$cyan
cmd="$EDIR/clustering_scripts/discoRAD_clustering.sh -f ${kissprefix}_raw.fa -s $src_file -o ${final_output}"
echo $green$cmd$cyan$reset
if [[ "$wraith" == "false" ]]; then
eval $cmd
fi
......@@ -668,7 +682,7 @@ else
fi
final_output="${kissprefix}.vcf"
cmd="python3 $EDIR/../scripts/create_filtered_vcf.py -i ${kissprefix}_raw.fa -o ${final_output} -m 0.95 -r 0.4"
echo $green$cmd$cyan
echo $green$cmd$cyan$reset
if [[ "$wraith" == "false" ]]; then
eval $cmd
fi
......