Skip to content
Commits on Source (9)
......@@ -12,7 +12,7 @@ cd build
echo "[Drone build] cmake configuration"
cmake -DDO_QUIET_MAKE=TRUE -DBOOST_ROOT=/usr ..
cmake -DDO_QUIET_MAKE=TRUE -DBOOST_ROOT=/usr -DNO_IPO=TRUE ..
echo "[Drone build] making salmon and installing locally (this could take a while)"
......
......@@ -32,7 +32,12 @@ install:
sudo apt-get install -qq "${CXX}"
fi
before_script:
- wget https://cmake.org/files/v3.15/cmake-3.15.2-Linux-x86_64.sh
- sudo sh cmake-3.15.2-Linux-x86_64.sh --prefix=/usr/local --exclude-subdir
script:
- PATH=/usr/local/bin:$PATH
- LD_LIBRARY_PATH=${TRAVIS_BUILD_DIR}/lib:$LD_LIBRARY_PATH
- mkdir build
- pushd build
......
......@@ -17,6 +17,8 @@ enable_testing()
project(Salmon)
option(USE_SHARED_LIBS "Use shared instead of static libraries" OFF)
# auto-populate version:
# from https://stackoverflow.com/questions/47066115/cmake-get-version-from-multi-line-text-file
file(READ "current_version.txt" ver)
......@@ -64,7 +66,7 @@ endif()
## Set the standard required compile flags
# Nov 18th --- removed -DHAVE_CONFIG_H
set(REMOVE_WARNING_FLAGS "-Wno-unused-function;-Wno-unused-local-typedefs")
set(TGT_COMPILE_FLAGS "-ftree-vectorize;-funroll-loops;-fPIC;-fomit-frame-pointer;-O3;-DNDEBUG")
set(TGT_COMPILE_FLAGS "-ftree-vectorize;-funroll-loops;-fPIC;-fomit-frame-pointer;-O3;-DNDEBUG;-DSTX_NO_STD_STRING_VIEW")
set(TGT_WARN_FLAGS "-Wall;-Wno-unknown-pragmas;-Wno-reorder;-Wno-unused-variable;-Wreturn-type;-Werror=return-type;${REMOVE_WARNING_FLAGS}")
......@@ -72,7 +74,9 @@ set(TGT_WARN_FLAGS "-Wall;-Wno-unknown-pragmas;-Wno-reorder;-Wno-unused-variable
# Sanitizers BEGIN
###
#list(APPEND TGT_COMPILE_FLAGS "-fsanitize=address")
#list(APPEND TGT_COMPILE_FLAGS "-fsanitize=undefined")
#set(CMAKE_LINK_FLAGS "-fsanitize=address")
#list(APPEND CMAKE_LINK_FLAGS "-fsanitize=undefined")
###
# Sanitizers END
###
......@@ -85,7 +89,10 @@ set(WARNING_IGNORE_FLAGS "")
endif()
## Prefer static to dynamic libraries
if(NOT USE_SHARED_LIBS)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
set(MALLOC_STATIC_BUILD_FLAG "--enable-static")
endif()
include(CheckIPOSupported)
......@@ -127,7 +134,7 @@ if(APPLE)
# To allow ourselves to build a dynamic library, we have to tell the compiler
# that, yes, the symbols will be around at runtime.
list(APPEND TGT_COMPILE_FLAGS "-undefined dynamic_lookup;-Wno-unused-command-line-argument")
set(LIBSALMON_LINKER_FLAGS "-all_load")
# set(LIBSALMON_LINKER_FLAGS "-all_load")
# In order to "think different", we also have to use non-standard suffixes
# for our shared libraries
set(SHARED_LIB_EXTENSION "dylib")
......@@ -255,18 +262,20 @@ set(GAT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
###
#
# Grab RapMap sources for quasi-mapping code --- DURING CONFIGURE TIME!
# Grab pufferfish source --- DURING CONFIGURE TIME!
#
####
if(NOT FETCHED_RAPMAP)
exec_program(${CMAKE_CURRENT_SOURCE_DIR}/scripts/fetchRapMap.sh RETURN_VALUE FETCH_RM_SCRIPT_RET)
message(STATUS "fetch RAPMAP exit code ${FETCH_RM_SCRIPT_RET}")
if(NOT (FETCH_RM_SCRIPT_RET EQUAL 0))
message(FATAL_ERROR "Could not fetch RapMap source [fetchRapMap.sh returned exit code ${FETCH_RM_SCRIPT_RET}].")
if(NOT FETCHED_PUFFERFISH)
exec_program(${CMAKE_CURRENT_SOURCE_DIR}/scripts/fetchPufferfish.sh RETURN_VALUE FETCH_PF_SCRIPT_RET)
message(STATUS "fetch PUFFERFISH exit code ${FETCH_PF_SCRIPT_RET}")
if(NOT (FETCH_PF_SCRIPT_RET EQUAL 0))
message(FATAL_ERROR "Could not fetch pufferfish source [fetchPufferfish.sh returned exit code ${FETCH_PF_SCRIPT_RET}].")
endif()
set(FETCHED_RAPMAP TRUE CACHE BOOL "Has RapMap been fetched?" FORCE)
set(FETCHED_PUFFERFISH TRUE CACHE BOOL "Has pufferfish been fetched?" FORCE)
endif()
##
# Super-secret override
##
......@@ -280,12 +289,15 @@ endif()
##
if(CONDA_BUILD)
set(Boost_USE_STATIC_LIBS OFF)
elseif(USE_SHARED_LIBS) # CI failed when using an OR statement above...
set(Boost_USE_STATIC_LIBS OFF)
else()
set(Boost_USE_STATIC_LIBS ON)
endif()
set(Boost_USE_MULTITHREADED ON)
#set(Boost_USE_STATIC_RUNTIME OFF)
set(Boost_USE_DEBUG_RUNTIME OFF)
find_package(ZLIB)
if(NOT ZLIB_FOUND)
......@@ -304,6 +316,11 @@ else()
set(EXTRA_CMAKE_LIBRARY_FLAGS "-L${CMAKE_LIBRARY_PATH}")
endif()
find_package(Iconv REQUIRED)
if(NOT Iconv_IS_BUILT_IN)
set(ICONV_LIB Iconv::Iconv)
endif()
find_package(LibLZMA)
if(NOT LIBLZMA_FOUND)
message("Will attempt to fetch and build liblzma")
......@@ -342,7 +359,7 @@ if(NOT BZIP2_FOUND)
message("=======================================")
externalproject_add(libbz2
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
DOWNLOAD_COMMAND curl -k -L http://www.bzip.org/1.0.6/bzip2-1.0.6.tar.gz -o bzip2-1.0.6.tar.gz &&
DOWNLOAD_COMMAND curl -k -L https://sourceware.org/pub/bzip2/bzip2-1.0.6.tar.gz -o bzip2-1.0.6.tar.gz &&
${SHASUM} a2848f34fcd5d6cf47def00461fcb528a0484d8edef8208d6d2e2909dc61d9cd bzip2-1.0.6.tar.gz &&
tar -xzvf bzip2-1.0.6.tar.gz
#URL http://www.bzip.org/1.0.6/bzip2-1.0.6.tar.gz
......@@ -368,11 +385,14 @@ endif()
##
# Set the latest version and look for what we need
##
set(Boost_ADDITIONAL_VERSIONS "1.59.0" "1.60.0" "1.61.0" "1.62" "1.63" "1.64" "1.65" "1.66")
set(Boost_ADDITIONAL_VERSIONS "1.59.0" "1.60.0" "1.61.0" "1.62.0" "1.63.0" "1.64.0" "1.65.0" "1.66.0" "1.67.0" "1.68.0" "1.69.0" "1.70.0" "1.71.0")
if (NOT BOOST_RECONFIGURE)
find_package(Boost 1.59.0 COMPONENTS iostreams filesystem system timer chrono program_options)
message("BOOST_INCLUDEDIR = ${BOOST_INCLUDEDIR}")
message("BOOST_LIBRARYDIR = ${BOOST_LIBRARYDIR}")
message("Boost_FOUND = ${Boost_FOUND}")
endif()
include(ExternalProject)
##
......@@ -389,7 +409,9 @@ if(BOOST_RECONFIGURE)
unset(Boost_LIBRARIES CACHE)
unset(BOOST_ROOT CACHE)
unset(CMAKE_PREFIX_PATH CACHE)
unset(Boost::diagnostic_definitions CACHE)
unset(Boost::disable_autolinking CACHE)
unset(Boost::dynamic_linking CACHE)
set(BOOST_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
set(Boost_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/include)
......@@ -412,6 +434,10 @@ if((NOT Boost_FOUND) AND (NOT FETCH_BOOST))
"To fetch and build boost locally, call cmake with -DFETCH_BOOST=TRUE"
)
elseif(FETCH_BOOST)
if(NOT DEFINED BOOST_BUILD_THREADS)
set(BOOST_BUILD_THREADS 2)
endif()
## Let the rest of the build process know we're going to be fetching boost
set(BOOST_LIB_SUBSET --with-iostreams --with-atomic --with-chrono --with-container --with-date_time --with-exception
--with-filesystem --with-graph --with-graph_parallel --with-math
......@@ -423,19 +449,19 @@ elseif(FETCH_BOOST)
message("==================================================================")
externalproject_add(libboost
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
DOWNLOAD_COMMAND curl -k -L http://downloads.sourceforge.net/project/boost/boost/1.66.0/boost_1_66_0.tar.gz -o boost_1_66_0.tar.gz &&
${SHASUM} bd0df411efd9a585e5a2212275f8762079fed8842264954675a4fddc46cfcf60 boost_1_66_0.tar.gz &&
tar xzf boost_1_66_0.tar.gz
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_66_0
DOWNLOAD_COMMAND curl -k -L https://dl.bintray.com/boostorg/release/1.71.0/source/boost_1_71_0.tar.gz -o boost_1_71_0.tar.gz &&
${SHASUM} 96b34f7468f26a141f6020efb813f1a2f3dfb9797ecf76a7d7cbd843cc95f5bd boost_1_71_0.tar.gz &&
tar xzf boost_1_71_0.tar.gz
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_71_0
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
#PATCH_COMMAND patch -p2 < ${CMAKE_CURRENT_SOURCE_DIR}/external/boost156.patch
CONFIGURE_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_66_0/bootstrap.sh ${BOOST_CONFIGURE_TOOLSET} ${BOOST_BUILD_LIBS} --prefix=<INSTALL_DIR>
CONFIGURE_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_71_0/bootstrap.sh ${BOOST_CONFIGURE_TOOLSET} ${BOOST_BUILD_LIBS} --prefix=<INSTALL_DIR>
add_custom_command(
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_66_0/tools/build/src/user-config.jam
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_71_0/tools/build/src/user-config.jam
PRE_BUILD
COMMAND echo "using gcc : ${CC_VERSION} : ${CMAKE_CXX_COMPILER} ;"
)
BUILD_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_66_0/b2 -d0 -j2 ${BOOST_LIB_SUBSET} toolset=${BOOST_TOOLSET} ${BOOST_EXTRA_FLAGS} cxxflags=${BOOST_CXX_FLAGS} link=static install
BUILD_COMMAND CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} ${CMAKE_CURRENT_SOURCE_DIR}/external/boost_1_71_0/b2 -d0 -j${BOOST_BUILD_THREADS} ${BOOST_LIB_SUBSET} toolset=${BOOST_TOOLSET} ${BOOST_EXTRA_FLAGS} cxxflags=${BOOST_CXX_FLAGS} link=static install
BUILD_IN_SOURCE 1
INSTALL_COMMAND ""
)
......@@ -477,28 +503,41 @@ message("BOOST LIBRARIES = ${Boost_LIBRARIES}")
set(EXTERNAL_LIBRARY_PATH $CMAKE_CURRENT_SOURCE_DIR/lib)
message("Build system will build libdivsufsort")
message("==================================================================")
include(ExternalProject)
externalproject_add(libdivsufsort
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
URL ${CMAKE_CURRENT_SOURCE_DIR}/external/libdivsufsort.zip
# Note: This zip comes from the fetched rapmap.zip, whose SHA we check
# so we souldn't need to check this one separately.
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/libdivsufsort-master
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
#UPDATE_COMMAND sh -c "mkdir -p <SOURCE_DIR>/build"
BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/libdivsufsort-master/build
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR> -DBUILD_DIVSUFSORT64=TRUE -DUSE_OPENMP=TRUE -DBUILD_SHARED_LIBS=FALSE
)
externalproject_add_step(libdivsufsort makedir
COMMAND mkdir -p <SOURCE_DIR>/build
COMMENT "Make build directory"
DEPENDEES download
DEPENDERS configure)
#find_package(libdivsufsort)
#if(NOT LIBDIVSUFSORT_FOUND)
# message("Build system will build libdivsufsort")
# message("==================================================================")
# include(ExternalProject)
# externalproject_add(libdivsufsort
# DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
# URL ${CMAKE_CURRENT_SOURCE_DIR}/external/libdivsufsort.zip
# # Note: This zip comes from the fetched rapmap.zip, whose SHA we check
# # so we souldn't need to check this one separately.
# SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/libdivsufsort-master
# INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
# #UPDATE_COMMAND sh -c "mkdir -p <SOURCE_DIR>/build"
# BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/libdivsufsort-master/build
# CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR> -DBUILD_DIVSUFSORT64=TRUE -DUSE_OPENMP=TRUE -DBUILD_SHARED_LIBS=FALSE
# )
# externalproject_add_step(libdivsufsort makedir
# COMMAND mkdir -p <SOURCE_DIR>/build
# COMMENT "Make build directory"
# DEPENDEES download
# DEPENDERS configure)
#
# set(SUFFARRAY_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/include)
# set(FETCHED_LIBDIVSUFSORT TRUE)
#else()
# message("SUFFARRAY_LIB = ${SUFFARRAY_LIBRARY}")
# set(SUFFARRAY_LIB ${SUFFARRAY_LIBRARY})
# message("SUFFARRAY_LIB64 = ${SUFFARRAY_LIBRARY64}")
# set(SUFFARRAY_LIB64 ${SUFFARRAY_LIBRARY64})
# set(SUFFARRAY_INCLUDE_DIRS ${SUFFARRAY_INCLUDE_DIR})
#endif()
set(SUFFARRAY_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/include)
find_package(Cereal)
if (NOT CEREAL_FOUND)
message("Build system will fetch and build the Cereal serialization library")
message("==================================================================")
include(ExternalProject)
......@@ -528,15 +567,29 @@ externalproject_add_step(libcereal makedir
DEPENDEES download
DEPENDERS configure)
set(FETCHED_CEREAL TRUE)
endif()
## Try and find TBB first
find_package(TBB 2018.0 COMPONENTS tbb tbbmalloc tbbmalloc_proxy)
if (${TBB_FOUND})
if (${TBB_VERSION} VERSION_GREATER_EQUAL 2018.0)
message("FOUND SUITABLE TBB VERSION : ${TBB_VERSION}")
set(TBB_TARGET_EXISTED TRUE)
else()
set(TBB_TARGET_EXISTED FALSE)
endif()
else()
set(TBB_TARGET_EXISTED FALSE)
endif()
##
#
# Fetch and build Intel's Threading Building Blocks library.
#
##
if((NOT TBB_FOUND) OR (TBB_FOUND AND (TBB_VERSION VERSION_LESS 2018.0)))
if(NOT ${TBB_TARGET_EXISTED})
set(TBB_WILL_RECONFIGURE TRUE)
# Set the appropriate compiler
......@@ -549,7 +602,7 @@ endif()
message("Build system will fetch and build Intel Threading Building Blocks")
message("==================================================================")
# These are useful for the custom install step we'll do later
set(TBB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/tbb-2018_U3)
set(TBB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/tbb-2019_U8)
set(TBB_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
if("${TBB_COMPILER}" STREQUAL "gcc")
......@@ -562,16 +615,10 @@ set(TBB_CXXFLAGS "${TBB_CXXFLAGS} ${CXXSTDFLAG}")
externalproject_add(libtbb
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
DOWNLOAD_COMMAND curl -k -L https://github.com/01org/tbb/archive/2018_U3.tar.gz -o tbb-2018_U3.tgz &&
${SHASUM} 23793c8645480148e9559df96b386b780f92194c80120acce79fcdaae0d81f45 tbb-2018_U3.tgz &&
tar -xzvf tbb-2018_U3.tgz
##
#URL https://github.com/01org/tbb/archive/2018_U3.tar.gz
#DOWNLOAD_NAME 2018_U3.tar.gz
#URL_HASH SHA1=d6cf16a42ece60aad6a722b369e1a2aa753347b4
#TLS_VERIFY FALSE
##
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/tbb-2018_U3
DOWNLOAD_COMMAND curl -k -L https://github.com/intel/tbb/archive/2019_U8.tar.gz -o tbb-2019_U8.tgz &&
${SHASUM} 7b1fd8caea14be72ae4175896510bf99c809cd7031306a1917565e6de7382fba tbb-2019_U8.tgz &&
tar -xzvf tbb-2019_U8.tgz
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/tbb-2019_U8
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
PATCH_COMMAND "${TBB_PATCH_STEP}"
CONFIGURE_COMMAND ""
......@@ -580,6 +627,9 @@ externalproject_add(libtbb
BUILD_IN_SOURCE 1
)
set(RECONFIG_FLAGS ${RECONFIG_FLAGS} -DTBB_WILL_RECONFIGURE=FALSE -DTBB_RECONFIGURE=TRUE)
externalproject_add_step(libtbb reconfigure
COMMAND ${CMAKE_COMMAND} ${CMAKE_CURRENT_SOURCE_DIR} ${RECONFIG_FLAGS}
......@@ -597,12 +647,18 @@ endif()
# so that CMake won't complain
##
if(TBB_WILL_RECONFIGURE)
set(TBB_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/include)
set(TBB_LIBRARY_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/lib)
set(TBB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
set(TBB_INCLUDE_DIRS ${TBB_INSTALL_DIR}/include)
set(TBB_INCLUDE_DIR ${TBB_INSTALL_DIR}/include)
set(TBB_LIBRARY_DIRS ${TBB_INSTALL_DIR}/lib)
set(TBB_LIBRARY ${TBB_INSTALL_DIR}/lib)
set(TBB_LIB_DIR ${TBB_INSTALL_DIR}/lib)
#set(TBB_LIBRARIES tbb tbbmalloc)
set(TBB_LIBRARIES ${CMAKE_CURRENT_SOURCE_DIR}/external/install/lib/libtbb.${SHARED_LIB_EXTENSION}
${CMAKE_CURRENT_SOURCE_DIR}/external/install/lib/libtbbmalloc.${SHARED_LIB_EXTENSION}
set(TBB_LIBRARIES ${TBB_INSTALL_DIR}/lib/libtbb.${SHARED_LIB_EXTENSION}
${TBB_INSTALL_DIR}/lib/libtbbmalloc.${SHARED_LIB_EXTENSION}
)
message("TBB_INCLUDE_DIRS = ${TBB_INCLUDE_DIRS}")
message("TBB_LIBRARY_DIRS = ${TBB_LIBRARY_DIRS}")
endif()
##
......@@ -614,15 +670,34 @@ if(TBB_RECONFIGURE)
unset(TBB_FOUND CACHE)
unset(TBB_INSTALL_DIR CACHE)
unset(CMAKE_PREFIX_PATH CACHE)
unset(TBB_INCLUDE_DIRS CACHE)
unset(TBB_INCLUDE_DIR CACHE)
unset(TBB_LIBRARY_DIRS CACHE)
unset(TBB_LIBRARY CACHE)
unset(TBB_LIBRARIES CACHE)
set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
set(TBB_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
set(TBB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install)
set(TBB_INCLUDE_DIRS ${TBB_INSTALL_DIR}/include)
set(TBB_INCLUDE_DIR ${TBB_INSTALL_DIR}/include)
set(TBB_LIBRARY_DIRS ${TBB_INSTALL_DIR}/lib)
set(TBB_LIBRARY ${TBB_INSTALL_DIR}/lib)
set(TBB_LIB_DIR ${TBB_INSTALL_DIR}/lib)
message("TBB_INSTALL_DIR = ${TBB_INSTALL_DIR}")
find_package(TBB)
find_package(TBB 2018.0 COMPONENTS tbb tbbmalloc tbbmalloc_proxy)
message("[in TBB_RECONFIGURE] TBB_LIBRARIES = ${TBB_LIBRARIES}")
endif()
message("TBB_LIBRARIES = ${TBB_LIBRARIES}")
#message("TBB_FOUND ${TBB_FOUND} ")
#message("TBB_INSTALL_DIR ${TBB_INSTALL_DIR}")
#message("TBB_INCLUDE_DIRS ${TBB_INCLUDE_DIRS}")
#message("TBB_INCLUDE_DIR ${TBB_INCLUDE_DIR} ")
#message("TBB_LIBRARY_DIRS ${TBB_LIBRARY_DIRS}")
#message("TBB_LIBRARIES ${TBB_LIBRARIES} ")
find_package(libgff)
if(NOT LIBGFF_FOUND)
message("Build system will compile libgff")
message("==================================================================")
externalproject_add(libgff
......@@ -647,6 +722,8 @@ externalproject_add_step(libgff makedir
COMMENT "Make build directory"
DEPENDEES download
DEPENDERS configure)
set(FETCHED_GFF TRUE)
endif()
# Because of the way that Apple has changed SIP
# in el capitan, some headers may be in a new location
......@@ -661,6 +738,9 @@ else()
set(LZFLAG "")
endif()
find_package(CURL)
find_package(libstadenio)
if (NOT LIBSTADENIO_FOUND)
message("Build system will compile Staden IOLib")
message("==================================================================")
externalproject_add(libstadenio
......@@ -680,6 +760,13 @@ externalproject_add(libstadenio
BUILD_IN_SOURCE 1
INSTALL_COMMAND make install
)
if(NOT LIBLZMA_FOUND)
ExternalProject_Add_StepDependencies(libstadenio build liblzma)
endif()
set(FETCHED_STADEN TRUE)
set(STADEN_LIBRARIES "${GAT_SOURCE_DIR}/external/install/lib/libstaden-read.a")
endif()
set(FAST_MALLOC_LIB "")
set(HAVE_FAST_MALLOC FALSE)
......@@ -690,12 +777,12 @@ if(Jemalloc_FOUND)
##
# Don't be so stringent about the version yet
##
#if (NOT (${JEMALLOC_VERSION} VERSION_LESS 5.1.0))
#if (NOT (${JEMALLOC_VERSION} VERSION_LESS 5.2.1))
message("Found Jemalloc library --- using this memory allocator")
set(FAST_MALLOC_LIB ${JEMALLOC_LIBRARIES})
set(HAVE_FAST_MALLOC TRUE)
#else()
# message("Fond Jemalloc version ${JEMALLOC_VERSION}, but require >= 5.1.0. Downloading newer version")
# message("Fond Jemalloc version ${JEMALLOC_VERSION}, but require >= 5.2.1. Downloading newer version")
#endif()
endif()
......@@ -710,14 +797,14 @@ if(NOT HAVE_FAST_MALLOC)
message("==================================================================")
externalproject_add(libjemalloc
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/jemalloc/archive/5.1.0.tar.gz -o jemalloc-5.1.0.tar.gz &&
${SHASUM} ff28aef89df724bd7b6bd6fde8597695514e0e3404d1afad2f1eb8b55ef378d3 jemalloc-5.1.0.tar.gz &&
tar -xzf jemalloc-5.1.0.tar.gz
DOWNLOAD_COMMAND curl -k -L https://github.com/jemalloc/jemalloc/archive/5.2.1.tar.gz -o jemalloc-5.2.1.tar.gz &&
${SHASUM} ed51b0b37098af4ca6ed31c22324635263f8ad6471889e0592a9c0dba9136aea jemalloc-5.2.1.tar.gz &&
tar -xzf jemalloc-5.2.1.tar.gz
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/jemalloc-5.1.0
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/jemalloc-5.2.1
BUILD_IN_SOURCE TRUE
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
CONFIGURE_COMMAND sh -c "${JEMALLOC_FLAGS} ./autogen.sh --disable-debug --prefix=<INSTALL_DIR>"
CONFIGURE_COMMAND sh -c "${JEMALLOC_FLAGS} ./autogen.sh --disable-debug ${MALLOC_STATIC_BUILD_FLAG} --prefix=<INSTALL_DIR>"
INSTALL_COMMAND cp -r lib <INSTALL_DIR>/ && cp -r include <INSTALL_DIR>/
)
......@@ -764,8 +851,14 @@ set(CPACK_SOURCE_IGNORE_FILES
message("CPACK_SOURCE_IGNORE_FILES = ${CPACK_SOURCE_IGNORE_FILES}")
# Recurse into Salmon source directory
# Recurse into pufferfish source directory
# and build the library
set(BUILD_PUFF_FOR_SALMON TRUE)
add_subdirectory(external/pufferfish)
# and then the main salmon source directory
add_subdirectory(src)
#add_dependencies(salmon RapMap)
# build a CPack driven installer package
include(CPack)
......
......@@ -2,19 +2,26 @@
[![Documentation Status](https://readthedocs.org/projects/salmon/badge/?version=latest)](http://salmon.readthedocs.org/en/latest)
[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat-square)](http://bioconda.github.io/recipes/salmon/README.html)
**Try out alevin (salmon's single-cell processing module)! Get started with the [tutorial](https://combine-lab.github.io/alevin-tutorial/#blog)**
**Help guide the development of Salmon, [take our survey](https://docs.google.com/forms/d/e/1FAIpQLSeWhBNE_fA_0uVHvbAlAulDmfmowv7rAYla879DZpqCARyRTQ/viewform)**
### Pre-computed decoy transcriptomes
Although the precomputed decoys (<=v.14.2) are still compatible with the latest major release (v1.0.0). We highly recommend updating your index using the full genome, as it gives significantly higher accuracy, for more information please check our extensive benchmarking on comparing different alignment methods and their performance on RNA-seq quantification in the latest revised preprint [manuscript](https://www.biorxiv.org/content/10.1101/657874v2). tl;dr: fast is good but fast and accurate is better !
Please use the [tutorial](https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/) for a step-by-step guide on how to efficiently index the reference transcriptome and genome for accurate gentrome based RNA-seq quantification.
**Facing problems with Indexing ?, [Check if anyone else already had this problem in the issues section or fill the index generation request form](https://forms.gle/3baJc5SYrkSWb1z48)**
What is Salmon?
===============
Salmon is a **wicked**-fast program to produce a highly-accurate, transcript-level quantification estimates from
RNA-seq data. Salmon achieves its accuracy and speed via a number of different innovations, including the
use of *quasi-mapping* (accurate but fast-to-compute proxies for traditional read alignments), and
use of *selective-alignment* (accurate but fast-to-compute proxies for traditional read alignments), and
massively-parallel stochastic collapsed variational inference. The result is a versatile tool that fits nicely
into many different pipelines. For example, you can choose to make use of our *quasi-mapping* algorithm by providing Salmon with raw sequencing reads, or, if it is more convenient, you can provide Salmon with regular alignments (e.g. an **unsorted** BAM file produced with your favorite aligner), and it will use the same **wicked**-fast, state-of-the-art inference algorithm
to estimate transcript-level abundances for your experiment.
into many different pipelines. For example, you can choose to make use of our *selective-alignment* algorithm by providing Salmon with raw sequencing reads, or, if it is more convenient, you can provide Salmon with regular alignments (e.g. an **unsorted** BAM file with alignments to the transcriptome produced with your favorite aligner), and it will use the same **wicked**-fast, state-of-the-art inference algorithm to estimate transcript-level abundances for your experiment.
Give salmon a try! You can find the latest binary releases [here](https://github.com/COMBINE-lab/salmon/releases).
......@@ -27,6 +34,9 @@ Documentation
The documentation for Salmon is available on [ReadTheDocs](http://readthedocs.org), check it out [here](http://salmon.readthedocs.org).
Salmon is, and will continue to be, [freely and actively supported on a best-effort basis](https://oceangenomics.com/about/#open).
If you need industrial-grade technical support, please consider the options at [oceangenomics.com/support](http://oceangenomics.com/support).
Chat live about Salmon
======================
......
###############################################################################
# Find Cereal
#
# This sets the following variables:
# CEREAL_FOUND - True if Cereal was found.
# CEREAL_INCLUDE_DIRS - Directories containing the Cereal include files.
# CEREAL_DEFINITIONS - Compiler flags for Cereal.
find_path(CEREAL_INCLUDE_DIR cereal
HINTS "${CEREAL_ROOT}/include" "$ENV{CEREAL_ROOT}/include" "/usr/include" "$ENV{PROGRAMFILES}/cereal/include")
set(CEREAL_INCLUDE_DIRS ${CEREAL_INCLUDE_DIR})
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Cereal DEFAULT_MSG CEREAL_INCLUDE_DIR)
mark_as_advanced(CEREAL_INCLUDE_DIR)
if(CEREAL_FOUND)
message(STATUS "Cereal found (include: ${CEREAL_INCLUDE_DIRS})")
endif(CEREAL_FOUND)
find_path(SUFFARRAY_INCLUDE_DIR divsufsort64.h
HINTS ${SUFFARRAY_ROOT} ENV SUFFARRAY_ROOT
PATH_SUFFIXES include)
find_library(SUFFARRAY_LIBRARY NAMES divsufsort divsufsort64 libdivsufsort libdivsufsort64
HINTS ${SUFFARRAY_ROOT} ENV SUFFARRAY_ROOT PATH_SUFFIXES lib lib64)
find_library(SUFFARRAY_LIBRARY64 NAMES divsufsort64 libdivsufsort64
HINTS ${SUFFARRAY_ROOT} ENV SUFFARRAY_ROOT PATH_SUFFIXES lib lib64)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(libdivsufsort DEFAULT_MSG
SUFFARRAY_LIBRARY
SUFFARRAY_LIBRARY64
SUFFARRAY_INCLUDE_DIR)
mark_as_advanced(SUFFARRAY_INCLUDE_DIR SUFFARRAY_LIBRARY)
find_path(GFF_INCLUDE_DIR gff.h
HINTS ${GFF_ROOT} ENV GFF_ROOT
PATH_SUFFIXES include)
find_library(GFF_LIBRARY NAMES gff libgff
HINTS ${GFF_ROOT} ENV GFF_ROOT PATH_SUFFIXES lib lib64)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(libgff DEFAULT_MSG
GFF_INCLUDE_DIR GFF_LIBRARY)
mark_as_advanced(GFF_INCLUDE_DIR GFF_LIBRARY)
###############################################################################
# Find Staden IOLib
#
# This sets the following variables:
# STADEN_FOUND - True if Staden IOLib was found.
# STADEN_INCLUDE_DIR - Header files.
# STADEN_LIBRARIES - Staden IOLib library.
find_path(STADEN_INCLUDE_DIR io_lib
HINTS ${STADEN_ROOT} ENV STADEN_ROOT
PATH_SUFFIXES include)
find_library(STADEN_LIBRARY NAMES staden-read libstaden-read
HINTS ${STADEN_ROOT} ENV STADEN_ROOT PATH_SUFFIXES lib lib64)
if(STADEN_INCLUDE_DIR)
set(_version_regex "^#define[ \t]+PACKAGE_VERSION[ \t]+\"([^\"]+)\".*")
file(STRINGS "${STADEN_INCLUDE_DIR}/io_lib/io_lib_config.h"
STADEN_VERSION REGEX "${_version_regex}")
string(REGEX REPLACE "${_version_regex}" "\\1"
STADEN_VERSION "${STADEN_VERSION}")
unset(_version_regex)
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(libstadenio DEFAULT_MSG
STADEN_LIBRARY
STADEN_INCLUDE_DIR
STADEN_VERSION)
if (LIBSTADENIO_FOUND)
message(STATUS "Staden IOLib found (include: ${STADEN_INCLUDE_DIR})")
set(STADEN_LIBRARIES ${STADEN_LIBRARY})
endif()
mark_as_advanced(STADEN_INCLUDE_DIR STADEN_LIBRARY)
......@@ -7,7 +7,7 @@ if (TAR_RESULT)
message(FATAL_ERROR "Error untarring sample_data.tgz")
endif()
set(SALMON_QUASI_INDEX_CMD ${CMAKE_BINARY_DIR}/salmon index -t transcripts.fasta -i sample_salmon_quasi_index --type quasi)
set(SALMON_QUASI_INDEX_CMD ${CMAKE_BINARY_DIR}/salmon index -t transcripts.fasta -i sample_salmon_quasi_index)
execute_process(COMMAND ${SALMON_QUASI_INDEX_CMD}
WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data
RESULT_VARIABLE SALMON_QUASI_INDEX_RESULT
......
VERSION_MAJOR 0
VERSION_MINOR 12
VERSION_MAJOR 1
VERSION_MINOR 0
VERSION_PATCH 0
salmon (1.0.0+ds1-1) UNRELEASED; urgency=medium
* New upstream version
* debhelper-compat 12
* Standards-Version: 4.4.0
* TODO: package pufferfish
-- Michael R. Crusoe <michael.crusoe@gmail.com> Thu, 14 Nov 2019 14:52:13 +0100
salmon (0.12.0+ds1-1) unstable; urgency=medium
* Team upload.
......
Source: salmon
Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.org>
Uploaders: Michael R. Crusoe <crusoe@ucdavis.edu>,
Uploaders: Michael R. Crusoe <michael.crusoe@gmail.com>,
Kevin Murray <spam@kdmurray.id.au>
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~),
Build-Depends: debhelper-compat (= 12),
cmake,
libboost-filesystem-dev,
libboost-system-dev,
......@@ -33,8 +33,8 @@ Build-Depends: debhelper (>= 11~),
libeigen3-dev,
libbz2-dev,
liblzma-dev,
rapmap-dev (>= 0.12.0~)
Standards-Version: 4.2.1
rapmap-dev
Standards-Version: 4.4.0
Vcs-Browser: https://salsa.debian.org/med-team/salmon
Vcs-Git: https://salsa.debian.org/med-team/salmon.git
Homepage: https://github.com/COMBINE-lab/salmon
......
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: salmon
Source: https://github.com/COMBINE-lab/salmon/
Files-Excluded: doc/build/*
include/CollapsedIterativeOptimizer.hpp.bak
src/SalmonQuantify.cpp.bak
include/eigen3
include/obsolete
Files-Excluded: include/eigen3
include/kseq.h
include/posix.h
include/tensemble/Tree.h.backup.h
src/posix.cc
cmake/Modules/FindTBB.cmake
scripts/cpld.bash
......
Author: Michael R. Crusoe <crusoe@ucdavis.edu>
Description: Disable the newer version check
--- a/src/Salmon.cpp
+++ b/src/Salmon.cpp
@@ -208,10 +208,10 @@ int main(int argc, char* argv[]) {
--- salmon.orig/src/Salmon.cpp
+++ salmon/src/Salmon.cpp
@@ -209,10 +209,10 @@
std::exit(0);
}
- if (!vm.count("no-version-check")) {
- std::string versionMessage = getVersionMessage();
- std::cout << versionMessage;
- std::cerr << versionMessage;
- }
+ // if (!vm.count("no-version-check")) {
+ // std::string versionMessage = getVersionMessage();
+ // std::cout << versionMessage;
+ // std::cerr << versionMessage;
+ // }
// po::notify(vm);
......
......@@ -2,86 +2,19 @@ Author: Andreas Tille <tille@debian.org>
Last-Update: Tue, 11 Dec 2018 19:29:39 +0100
Description: Use Debian packaged libraries and make sure these are linked dynamically
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -477,6 +477,7 @@ message("BOOST LIBRARIES = ${Boost_LIBRA
set(EXTERNAL_LIBRARY_PATH $CMAKE_CURRENT_SOURCE_DIR/lib)
+if(!DEBIAN_BUILD)
message("Build system will build libdivsufsort")
message("==================================================================")
include(ExternalProject)
@@ -498,7 +499,13 @@ externalproject_add_step(libdivsufsort m
DEPENDERS configure)
set(SUFFARRAY_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/external/install/include)
+else()
+message("Use Debian packaged libdivsufsort")
+message("=================================")
+add_custom_target(libdivsufsort)
+endif()
+if(!DEBIAN_BUILD)
message("Build system will fetch and build the Cereal serialization library")
message("==================================================================")
include(ExternalProject)
@@ -527,10 +534,15 @@ externalproject_add_step(libcereal maked
COMMENT "Make build directory"
DEPENDEES download
DEPENDERS configure)
+else()
+add_custom_target(libcereal)
+endif()
--- salmon.orig/CMakeLists.txt
+++ salmon/CMakeLists.txt
@@ -572,6 +572,8 @@
## Try and find TBB first
find_package(TBB 2018.0 COMPONENTS tbb tbbmalloc tbbmalloc_proxy)
+# Manually set version since I have no idea how to get this from TBB cmake config
+set(TBB_VERSION "2018.0")
##
#
# Fetch and build Intel's Threading Building Blocks library.
@@ -623,6 +635,7 @@ endif()
message("TBB_LIBRARIES = ${TBB_LIBRARIES}")
+if(!DEBIAN_BUILD)
message("Build system will compile libgff")
message("==================================================================")
externalproject_add(libgff
@@ -647,6 +660,9 @@ externalproject_add_step(libgff makedir
COMMENT "Make build directory"
DEPENDEES download
DEPENDERS configure)
+else()
+add_custom_target(libgff)
+endif()
# Because of the way that Apple has changed SIP
# in el capitan, some headers may be in a new location
@@ -661,6 +677,7 @@ else()
set(LZFLAG "")
endif()
+set(TBB_VERSION "2019.0")
+if(!DEBIAN_BUILD)
message("Build system will compile Staden IOLib")
message("==================================================================")
externalproject_add(libstadenio
@@ -680,6 +697,11 @@ externalproject_add(libstadenio
BUILD_IN_SOURCE 1
INSTALL_COMMAND make install
)
+else()
+message("Use Debian packaged Staden IOLib")
+message("================================")
+add_custom_target(libstadenio)
+endif()
set(FAST_MALLOC_LIB "")
set(HAVE_FAST_MALLOC FALSE)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
if (${TBB_FOUND})
if (${TBB_VERSION} VERSION_GREATER_EQUAL 2018.0)
--- salmon.orig/src/CMakeLists.txt
+++ salmon/src/CMakeLists.txt
@@ -1,6 +1,6 @@
include_directories(
${GAT_SOURCE_DIR}/include
......@@ -90,10 +23,10 @@ Description: Use Debian packaged libraries and make sure these are linked dynami
${GAT_SOURCE_DIR}/external
${GAT_SOURCE_DIR}/external/cereal/include
${GAT_SOURCE_DIR}/external/install/include
@@ -8,8 +8,8 @@ ${GAT_SOURCE_DIR}/external/install/inclu
${ZLIB_INCLUDE_DIR}
${TBB_INCLUDE_DIRS}
${Boost_INCLUDE_DIRS}
@@ -11,8 +11,8 @@
${GAT_SOURCE_DIR}/external/install/include/pufferfish
${GAT_SOURCE_DIR}/external/install/include/pufferfish/digestpp
-${GAT_SOURCE_DIR}/external/install/include/rapmap
-${GAT_SOURCE_DIR}/external/install/include/rapmap/digestpp
+/usr/include/rapmap
......@@ -101,30 +34,18 @@ Description: Use Debian packaged libraries and make sure these are linked dynami
${ICU_INC_DIRS}
)
@@ -199,8 +199,8 @@ add_executable(unitTests ${UNIT_TESTS_SR
#set_target_properties(salmon_core salmon PROPERTIES LINK_SEARCH_END_STATIC TRUE)
# our suffix array construction libraries
-set (SUFFARRAY_LIB ${GAT_SOURCE_DIR}/external/install/lib/libdivsufsort.a)
-set (SUFFARRAY64_LIB ${GAT_SOURCE_DIR}/external/install/lib/libdivsufsort64.a)
+set (SUFFARRAY_LIB divsufsort)
+set (SUFFARRAY64_LIB divsufsort64)
# Link the executable
@@ -209,20 +209,20 @@ target_link_libraries(salmon
salmon_core
alevin_core
@@ -234,20 +234,20 @@
graphdump
ntcard
gff
- ${Boost_LIBRARIES}
+ boost_iostreams boost_filesystem boost_system boost_timer boost_chrono boost_program_options boost_regex
${ICU_LIBS}
- ${GAT_SOURCE_DIR}/external/install/lib/libstaden-read.a
${STADEN_LIBRARIES} ${CURL_LIBRARIES}
- ${ZLIB_LIBRARY}
+ staden-read
+ z
${SUFFARRAY_LIB}
${SUFFARRAY64_LIB}
#${SUFFARRAY_LIB}
#${SUFFARRAY_LIB64}
#${GAT_SOURCE_DIR}/external/install/lib/libbwa.a
m
- ${LIBLZMA_LIBRARIES}
......@@ -138,21 +59,17 @@ Description: Use Debian packaged libraries and make sure these are linked dynami
+ jemalloc
${LIBRT}
ksw2pp
${CMAKE_DL_LIBS}
@@ -234,17 +234,17 @@ target_link_libraries(unitTests
## PUFF_INTEGRATION
@@ -265,14 +265,14 @@
salmon_core
alevin_core
gff
${STADEN_LIBRARIES}
- ${Boost_LIBRARIES}
+ boost_iostreams boost_filesystem boost_system boost_timer boost_chrono boost_program_options boost_regex
${ICU_LIBS}
- ${GAT_SOURCE_DIR}/external/install/lib/libstaden-read.a
${CURL_LIBRARIES}
- ${ZLIB_LIBRARY}
+ staden-read
+ z
${SUFFARRAY_LIB}
${SUFFARRAY64_LIB}
#${GAT_SOURCE_DIR}/external/install/lib/libbwa.a
m
- ${LIBLZMA_LIBRARIES}
+ lzma
......
......@@ -4,7 +4,7 @@ Alevin
Alevin is a tool --- integrated with the salmon software --- that introduces a family of algorithms for quantification and analysis of 3' tagged-end single-cell sequencing data. Currently alevin supports the following two major droplet based single-cell protocols:
1. Drop-seq
2. 10x-Chromium v1/2
2. 10x-Chromium v1/2/3
Alevin works under the same indexing scheme (as salmon) for the reference, and consumes the set of FASTA/Q files(s) containing the Cellular Barcode(CB) + Unique Molecule identifier (UMI) in one read file and the read sequence in the other. Given just the transcriptome and the raw read files, alevin generates a cell-by-gene count matrix (in a fraction of the time compared to other tools).
......@@ -18,7 +18,7 @@ Alevin requires the following minimal set of necessary input parameters (general
* ``-l``: library type (same as salmon), we recommend using `ISR` for both Drop-seq and 10x-v2 chemistry.
* ``-1``: CB+UMI file(s), alevin requires the path to the *FASTQ* file containing CB+UMI raw sequences to be given under this command line flag. Alevin also supports parsing of data from multiple files as long as the order is the same as in `-2` flag.
* ``-2``: Read-sequence file(s), alevin requires the path to the *FASTQ* file containing raw read-sequences to be given under this command line flag. Alevin also supports parsing of data from multiple files as long as the order is the same as in `-1` flag.
* ``--dropseq / --chromium``: the protocol, this flag tells the type of single-cell protocol of the input sequencing-library.
* ``--dropseq / --chromium / --chromiumV3``: the protocol, this flag tells the type of single-cell protocol of the input sequencing-library.
* ``-i``: index, file containing the salmon index of the reference transcriptome, as generated by `salmon index` command.
* ``-p``: number of threads, the number of threads which can be used by alevin to perform the quantification, by default alevin utilizes *all* the available threads in the system, although we recommend using ~10 threads which in our testing gave the best memory-time trade-off.
* ``-o``: output, path to folder where the output gene-count matrix (along with other meta-data) would be dumped.
......@@ -105,18 +105,14 @@ If Alevin is passed the ``--noDedup`` option, the pipeline only performs CB corr
The list of mitochondrial genes which are to be used as a feature for CB whitelising naive Bayes classification.
.. note:: It is generally advisable to not use nuclear mitrochondrial genes in this as they can be both up and/or down regulated which might cancel out the usefulness of this feature. Please check issue `#367 <https://github.com/COMBINE-lab/salmon/issues/367>`_ in salmon repo to know more about it.
""""""""""""
``--rrna``
""""""""""""
The list of ribosomal genes which are to be used as a feature for CB whitelising naive Bayes classification.
""""""""""""
``--useCorrelation``
""""""""""""
If activated, in CB whitelist classification alevin computes the cell-by-cell pearson correlation of each candidate CB with putative true set of CB. This flag can slow down alevin's processing.
""""""""""""
``--dumpfq``
""""""""""""
......@@ -134,13 +130,13 @@ Alevin internally uses a potentially big data-structure to concisely maintain al
``--dumpFeatures``
""""""""""""
If activated, alevin dumps all the features used by the CB classification and their counts at each cell level. Generally, this is used for the purposes of debugging.
If activated, alevin dumps all the features used by the CB classification and their counts at each cell level. It's generally used in pair with other command line flags.
""""""""""""
``--dumpCsvCounts``
``--dumpMtx``
""""""""""""
This flags is used to internally convert the default binary format of alevin for gene-count matrix into a human readable csv (comma separated) format. The expression of all the gene in one cell is written in one row, while columns represents the genes.
This flags is used to internally convert the default binary format of alevin for gene-count matrix into a human readable mtx (matrix market exchange) sparse format.
""""""""""""""""""""""
``--forceCells``
......@@ -157,6 +153,8 @@ Just like `forceCells` flag, it's yet another way of skipping the knee calculati
""""""""""""""""""""""
Alevin provides an estimate of the inferential uncertainty in the estimation of per cell level gene count matrix by performing bootstrapping of the reads in per-cell level equivalence classes. This command line flag informs Alevin to perform certain number of bootstrap and generate the mean and variance of the count matrix. This option generates three additional file, namely, `quants_mean_mat.gz`, `quants_var_mat.gz` and `quants_boot_rows.txt`. The format of the files stay the same as `quants_mat.gz` while the row order is saved in `quants_boot_rows.txt` and the column order is stays the same as in file `quants_mat_cols.txt`.
.. note:: Alevin can also dump the full bootstrap cell-gene count matrix of a experiment. To generate inferential replicates of the experiemnt, `--numCellBootstraps` has to be paired with `--dumpFeatures` which generates a file with name `quants_boot_mat.gz`. The output format is the same as `quants_mat.gz` and we fit the 3D cube of the cell-inference-gene counts in 2D as follows: if an experiment has C cells, G genes and N inferential replicates; alevin output file `quants_boot_mat.gz` would contain C*N rows and G columns while, starting from the top, the first N rows would represent first cell and it's N inferential replicate. For more information on importing and using inferential replicates for single-cell data in generating accurate differential expression analysis, check out `tximport <https://github.com/mikelove/tximport>`_ and our `Swish <https://www.biorxiv.org/content/10.1101/561084v2>`_ paper.
""""""""""""""""""""""
``--debug``
""""""""""""""""""""""
......@@ -192,7 +190,20 @@ A typical run of alevin will generate 4 files:
* *quants\_mat\_rows.txt* -- Row Index (CB-ids) of the matrix.
* *quants\_tier\_mat.gz* -- Tier categorization of the matrix.
Alevin can also dump the count-matrix in a human readable -- comma-separated-value (_CSV_) format, if given flag `--dumpCsvCounts` which generates a new output file called `quants_mat.csv`.
Along with the Cell-v-Gene count matrix, alevin dumps a 3-fold categorization of each estimated count value of a gene(each cell disjointly) in the form of tiers. Tier 1 is the set of genes where all the reads are uniquely mapping. Tier 2 is genes that have ambiguously mapping reads, but connected to unique read evidence as well, that can be used by the EM to resolve the multimapping reads. Tier 3 is the genes that have no unique evidence and the read counts are, therefore, distributed between these genes according to an uninformative prior.
Alevin can also dump the count-matrix in a human readable -- matrix-market-exchange (_mtx_) format, if given flag `--dumpMtx` which generates a new output file called `quants_mat.mtx`.
Output Quality Check
-------------
Alevin generated gene-count matrix can be visualized for various quality checks using `alevinQC <https://csoneson.github.io/alevinQC/>`_ , a shiny based R package and it is actively supported by `Charlotte Soneson <https://csoneson.github.io/>`_.
Tutorial & Parsers
------------------
We have compiled a step-by-step resource to help get started with aleivn. We have tutorials on how to get input, run and generate output using alevin's framework which can be found here at `Alevin Tutorials <https://combine-lab.github.io/alevin-tutorial/#blog>`_.
The tutorial also covers the topic of integrating alevin with downstream analysis tools like Seurat and Monocle. If you are interested in parsing various output binary formats like `quants_mat.gz`, `quants_tier_mat.gz`, `cell_umigraph.gz` etc. of alevin in python, checkout our companion repo for `python parsing <https://github.com/k3yavi/vpolo/blob/master/vpolo/alevin/parser.py>`_. This repo is also available on pip and can be installed through `pip install vpolo`. We cover how to use this library on our alevin-tutorial website too.
Misc
----
......@@ -208,9 +219,27 @@ suggestions --- please contact us (`asrivastava@cs.stonybrook.edu` and/or
.. The paper describing this method is published in BioArxiv XXXX. (update this when it appears)
Citation
----
| @article{srivastava2019alevin,
| title={Alevin efficiently estimates accurate gene abundances from dscRNA-seq data},
| author={Srivastava, Avi and Malik, Laraib and Smith, Tom and Sudbery, Ian and Patro, Rob},
| journal={Genome biology},
| volume={20},
| number={1},
| pages={65},
| year={2019},
| publisher={BioMed Central}
| }
References
----------
.. [#swish] Zhu, Anqi, et al. "Nonparametric expression analysis using inferential replicate counts." BioRxiv (2019): 561084.
.. [#monocle] Qiu, Xiaojie, et al. "Reversed graph embedding resolves complex single-cell trajectories." Nature methods 14.10 (2017): 979.
.. [#seurat] Butler, Andrew, et al. "Integrating single-cell transcriptomic data across different conditions, technologies, and species." Nature biotechnology 36.5 (2018): 411.
.. [#dropseq] Macosko, Evan Z., et al. "Highly parallel genome-wide expression profiling of individual cells using nanoliter droplets." Cell 161.5 (2015): 1202-1214.
......
......@@ -55,9 +55,9 @@ copyright = u'2013-2017, Rob Patro, Geet Duggal, Mike Love, Rafael Irizarry and
# built documents.
#
# The short X.Y version.
version = '0.12'
version = '1.0'
# The full version, including alpha/beta/rc tags.
release = '0.12.0'
release = '1.0.0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
......
......@@ -52,7 +52,7 @@ So, for example, if you wanted to specify a fragment library of strand-specific
paired-end reads, oriented toward each other, where read 1 comes from the
forward strand and read 2 comes from the reverse strand, you would specify ``-l
ISF`` on the command line. This designates that the library being processed has
the type "ISF" meaning, **I**\ nward (the relative orientation), **S**\ tranted
the type "ISF" meaning, **I**\ nward (the relative orientation), **S**\ tranded
(the protocol is strand-specific), **F**\ orward (read 1 comes from the forward
strand).
......
This diff is collapsed.