Skip to content
Commits on Source (2)
Stacks 2.0 Beta 9 - Mar 12, 2018
--------------------------------
Feature: Cleaned up tags/snps/alleles/matches files. We removed the batch ID from ustacks and cstacks
output, and the deprecated log likelihood fields from ustacks and cstacks. We also removed
the chromosome/bp/strand fields as they are no longer used in these files.
Feature: Renamed gstacks output files that represent the new components of the catalog:
gstacks.fa.gz => catalog.fa.gz; gstacks.vcf.gz => catalog.calls
Feature: Removed read length restrictions from ustacks/cstacks/sstacks core, reads/loci can vary in
length throughout the pipeline.
Feature: Reimplemented PLINK export format for the populations program.
Bugfix: Updated to HTSLib 1.7; changed to a custom build system that will work with the Stacks build
system.
Bugfix: Made gapped alignments mandatory in ustacks, cstacks, and sstacks. Added check for frameshift
at 3' end of the read -- if found, a match is deferred to the gapped aligner.
Stacks 2.0 Beta 8 - Feb 03, 2018
--------------------------------
Feature: populations: Now calculated deviation from Hardy-Weinberg equilibrium at the SNP level
......
SUBDIRS = htslib
HTSLIB = htslib/libhts.a
AM_CPPFLAGS = $(OPENMP_CFLAGS) -I$(top_srcdir)/htslib
AM_LDFLAGS = $(OPENMP_CFLAGS)
LDADD = $(HTSLIB) libcore.a
noinst_LIBRARIES = libcore.a libclean.a libpop.a
bin_PROGRAMS = ustacks cstacks sstacks process_radtags process_shortreads \
kmer_filter clone_filter populations phasedstacks \
tsv2bam gstacks
noinst_LIBRARIES = libcore.a libclean.a libpop.a
libcore_a_SOURCES = \
src/aln_utils.h src/aln_utils.cc \
src/BamI.h src/BamI.cc \
......@@ -38,7 +44,8 @@ libcore_a_SOURCES = \
src/stacks.h src/stacks.cc \
src/Tsv.h \
src/utils.h src/utils.cc \
src/Vcf.h src/Vcf.cc
src/Vcf.h src/Vcf.cc \
$(HTSLIB)
libclean_a_SOURCES = \
src/clean.h src/clean.cc \
......@@ -56,9 +63,6 @@ libpop_a_SOURCES = \
src/smoothing_utils.h \
src/Hwp.h src/Hwp.cc
libcore_a_CXXFLAGS = $(OPENMP_CFLAGS) $(BAM_CFLAGS)
libpop_a_CXXFLAGS = $(OPENMP_CFLAGS) $(BAM_CFLAGS)
phasedstacks_SOURCES = libcore.a src/phasedstacks.h src/phasedstacks.cc
process_radtags_SOURCES = libcore.a libclean.a src/process_radtags.h src/process_radtags.cc
......@@ -69,55 +73,17 @@ clone_filter_SOURCES = libcore.a libclean.a src/clone_filter.h src/clone_f
ustacks_SOURCES = libcore.a src/ustacks.h src/ustacks.cc
cstacks_SOURCES = libcore.a src/cstacks.h src/cstacks.cc
sstacks_SOURCES = libcore.a src/sstacks.h src/sstacks.cc
populations_SOURCES = libcore.a libpop.a src/populations.h src/populations.cc
# tsv2bam & gstacks
tsv2bam_SOURCES = libcore.a src/tsv2bam.cc
gstacks_SOURCES = libcore.a src/gstacks.h src/gstacks.cc src/debruijn.h src/debruijn.cc src/Alignment.h src/SuffixTree.h src/SuffixTree.cc
populations_SOURCES = libcore.a libpop.a src/populations.h src/populations.cc
process_radtags_LDADD = $(LDADD) libclean.a
process_shortreads_LDADD = $(LDADD) libclean.a
clone_filter_LDADD = $(LDADD) libclean.a
kmer_filter_LDADD = $(LDADD) libclean.a
populations_LDADD = $(LDADD) libpop.a
phasedstacks_CXXFLAGS = $(OPENMP_CFLAGS)
phasedstacks_LDFLAGS = $(OPENMP_CFLAGS)
phasedstacks_LDADD = libcore.a $(BAM_LIBS)
ustacks_CXXFLAGS = $(OPENMP_CFLAGS)
ustacks_LDFLAGS = $(OPENMP_CFLAGS)
ustacks_LDADD = libcore.a $(BAM_LIBS)
cstacks_CXXFLAGS = $(OPENMP_CFLAGS)
cstacks_LDFLAGS = $(OPENMP_CFLAGS)
cstacks_LDADD = libcore.a $(BAM_LIBS)
sstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(BAM_CFLAGS)
sstacks_LDFLAGS = $(OPENMP_CFLAGS)
sstacks_LDADD = libcore.a $(BAM_LIBS)
process_radtags_CXXFLAGS = $(OPENMP_CFLAGS) $(BAM_CFLAGS)
process_radtags_LDFLAGS = $(OPENMP_CFLAGS)
process_radtags_LDADD = libcore.a libclean.a $(BAM_LIBS)
process_shortreads_CXXFLAGS = $(OPENMP_CFLAGS) $(BAM_CFLAGS)
process_shortreads_LDFLAGS = $(OPENMP_CFLAGS)
process_shortreads_LDADD = libcore.a libclean.a $(BAM_LIBS)
clone_filter_CXXFLAGS = $(OPENMP_CFLAGS) $(BAM_CFLAGS)
clone_filter_LDFLAGS = $(OPENMP_CFLAGS)
clone_filter_LDADD = libcore.a libclean.a $(BAM_LIBS)
kmer_filter_CXXFLAGS = $(OPENMP_CFLAGS)
kmer_filter_LDFLAGS = $(OPENMP_CFLAGS)
kmer_filter_LDADD = libcore.a libclean.a $(BAM_LIBS)
populations_CXXFLAGS = $(OPENMP_CFLAGS) $(BAM_CFLAGS)
populations_LDFLAGS = $(OPENMP_CFLAGS)
populations_LDADD = libcore.a libpop.a $(BAM_LIBS)
# tsv2bam & gstacks
tsv2bam_CXXFLAGS = $(OPENMP_CFLAGS) $(BAM_CFLAGS)
tsv2bam_LDFLAGS = $(OPENMP_CFLAGS)
tsv2bam_LDADD = libcore.a $(BAM_LIBS)
gstacks_CXXFLAGS = $(OPENMP_CFLAGS) $(BAM_CFLAGS)
gstacks_LDFLAGS = $(OPENMP_CFLAGS)
gstacks_LDADD = libcore.a $(BAM_LIBS)
dist_bin_SCRIPTS = scripts/denovo_map.pl scripts/ref_map.pl scripts/export_sql.pl \
scripts/sort_read_pairs.pl scripts/exec_velvet.pl scripts/load_sequences.pl \
scripts/index_radtags.pl scripts/load_radtags.pl scripts/stacks_export_notify.pl \
dist_bin_SCRIPTS = scripts/denovo_map.pl scripts/ref_map.pl \
scripts/integrate_alignments.py scripts/count_fixed_catalog_snps.py \
scripts/stacks-integrate-alignments scripts/stacks-dist-extract scripts/stacks-gdb
......@@ -142,8 +108,6 @@ EXTRA_DIST = $(nobase_pkgdata_DATA) LICENSE INSTALL README ChangeLog $(TESTS)
pkglocalstatedir = $(localstatedir)/$(PACKAGE)
SUBDIRS = htslib .
debug:
$(MAKE) all "CXXFLAGS=-g -Wall -DDEBUG -std=gnu++11"
......@@ -154,28 +118,6 @@ install-data-hook:
sed -e 's,_VERSION_,$(VERSION),' -e 's,_BINDIR_,$(bindir)/,g' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/ref_map.pl > $(DESTDIR)$(bindir)/ref_map.pl.subst
mv $(DESTDIR)$(bindir)/ref_map.pl.subst $(DESTDIR)$(bindir)/ref_map.pl
chmod +x $(DESTDIR)$(bindir)/ref_map.pl
sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/export_sql.pl > $(DESTDIR)$(bindir)/export_sql.pl.subst
mv $(DESTDIR)$(bindir)/export_sql.pl.subst $(DESTDIR)$(bindir)/export_sql.pl
chmod +x $(DESTDIR)$(bindir)/export_sql.pl
sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/index_radtags.pl > $(DESTDIR)$(bindir)/index_radtags.pl.subst
mv $(DESTDIR)$(bindir)/index_radtags.pl.subst $(DESTDIR)$(bindir)/index_radtags.pl
chmod +x $(DESTDIR)$(bindir)/index_radtags.pl
sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/load_radtags.pl > $(DESTDIR)$(bindir)/load_radtags.pl.subst
mv $(DESTDIR)$(bindir)/load_radtags.pl.subst $(DESTDIR)$(bindir)/load_radtags.pl
chmod +x $(DESTDIR)$(bindir)/load_radtags.pl
sed -e 's,_VERSION_,$(VERSION),' $(DESTDIR)$(bindir)/sort_read_pairs.pl > $(DESTDIR)$(bindir)/sort_read_pairs.pl.subst
mv $(DESTDIR)$(bindir)/sort_read_pairs.pl.subst $(DESTDIR)$(bindir)/sort_read_pairs.pl
chmod +x $(DESTDIR)$(bindir)/sort_read_pairs.pl
sed -e 's,_VERSION_,$(VERSION),' $(DESTDIR)$(bindir)/exec_velvet.pl > $(DESTDIR)$(bindir)/exec_velvet.pl.subst
mv $(DESTDIR)$(bindir)/exec_velvet.pl.subst $(DESTDIR)$(bindir)/exec_velvet.pl
chmod +x $(DESTDIR)$(bindir)/exec_velvet.pl
sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/load_sequences.pl > $(DESTDIR)$(bindir)/load_sequences.pl.subst
mv $(DESTDIR)$(bindir)/load_sequences.pl.subst $(DESTDIR)$(bindir)/load_sequences.pl
chmod +x $(DESTDIR)$(bindir)/load_sequences.pl
sed -e 's,_VERSION_,$(VERSION),' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' -e 's,_BINDIR_,$(bindir)/,g' \
$(DESTDIR)$(bindir)/stacks_export_notify.pl > $(DESTDIR)$(bindir)/stacks_export_notify.pl.subst
mv $(DESTDIR)$(bindir)/stacks_export_notify.pl.subst $(DESTDIR)$(bindir)/stacks_export_notify.pl
chmod +x $(DESTDIR)$(bindir)/stacks_export_notify.pl
sed -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' -e 's,_BINDIR_,$(bindir)/,g' \
$(DESTDIR)$(pkgdatadir)/php/constants.php.dist > $(DESTDIR)$(pkgdatadir)/php/constants.php.dist.subst
mv $(DESTDIR)$(pkgdatadir)/php/constants.php.dist.subst $(DESTDIR)$(pkgdatadir)/php/constants.php.dist
......
This diff is collapsed.
......@@ -7,30 +7,10 @@ Stacks should build on any standard UNIX-like environment (Apple OS X, Linux,
etc.) Stacks is an independent pipeline and can be run without any additional
external software.
Note: Apple OS X does not use the GNU Compiler Collection, which is
standard on Linux-based systems. Instead, Apple distributes CLANG,
which is a nice compiler but does not yet support the OpenMP library
which Stacks relies on for parallel processing. Stacks can still be built
and run on an Apple system, however, you will have to disable building
with OpenMP (supply the --disable-openmp flag to configure) and use
non-parallelized code. If you want to install a parallelized version of
Stacks, you can install GCC by hand, or using a package system such as
Homebrew (http://brew.sh/) or MacPorts (http://www.macports.org/).
If compiling with the standard GCC compiler, Stacks needs a version of the compiler
newer than GCC 4.9.0.
1. Install optional components for wrapper programs:
Several Perl scripts are distributed with Stacks to run the pipeline components and
upload pipeline output to the MySQL database serer. For these to work, you must have
the Perl DBI module installed with the MySQL driver. Most Linux distributions will
include the Perl DBI module, but if not Perl modules are easily installed with the
cpan tool.
% sudo apt-get install libdbd-mysql-perl
A similar set of commands can be executed on Debian using apt-get, or on a RedHat derived Linux
system using yum, or another package manager on other Linux distributions.
2. Build the software. Stacks uses the standard autotools install:
1. Build the software. Stacks uses the standard autotools install:
% tar xfvz stacks-x.xx.tar.gz
% cd stacks-x.xx
......@@ -60,138 +40,4 @@ A default Stacks install will install files in the following way:
/usr/local/bin - stacks executables and perl scripts
/usr/local/share/stacks - PHP files for the web interface
and SQL files for creating the
MySQL database
The pipeline is now ready to run. The remaining install instructions are to get the
web interface up and running. The web interface is very useful for visualization and
more or less required for building genetic maps. However, Stacks does not depend on
the web interface to run.
The Stacks Web Interface
------------------------
To visualize data, Stacks uses a web-based interface (written in PHP) that interacts
with a MySQL database server. MySQL provides various functions to store, sort, and
export data from a database.
0. Prerequisites
Most server installations will provide Apache, MySQL, Perl, and PHP by default. If you want
to export data in Microsoft Excel Spreadsheets, you will need the
Spreadsheet::WriteExcel Perl module. While installing these components is beyond these
instructions, here are some links that might be useful:
1. MySQL Database: http://dev.mysql.com/downloads/mysql/
2. Spreadsheet Perl Module: http://search.cpan.org/~jmcnamara/Spreadsheet-WriteExcel-2.40/
If you are running a version of Linux, the above software can be installed via the
package manager. If you are using Ubuntu, you can install the following packages:
% sudo apt-get install mysql-server mysql-client
% sudo apt-get install php5 php5-mysqlnd
% sudo apt-get install libspreadsheet-writeexcel-perl
A similar set of commands can be executed on Debian using apt-get, or on a RedHat derived Linux
system using yum, or another package manager on other Linux distributions.
1. Edit the MySQL configuration file, installed in
/usr/local/share/stacks/sql/mysql.cnf.dist, to enable access to the
database from the Stacks scripts.
% cd /usr/local/share/stacks/sql/
% cp mysql.cnf.dist mysql.cnf
Edit the file to reflect the proper username, password, and host to
use to access MySQL.
The various scripts that access the database will search for a MySQL
configuration file in your home directory before using the
Stacks-distributed copy. If you already have a personal account set up
and configured (in ~/.my.cnf) you can continue to use these
credentials instead of setting up new, common ones.
If you just installed MySQL and have not added any users, you can do so with these
commands:
% mysql
mysql> GRANT ALL ON *.* TO 'stacks_user'@'localhost' IDENTIFIED BY 'stackspassword';
Edit /usr/local/share/stacks/sql/mysql.cnf to contain the username and password
you specified to MySQL.
(This information was taken from: http://dev.mysql.com/doc/refman/5.1/en/grant.html)
2. Enable the Stacks web interface in the Apache webserver.
Add the following lines to your Apache configuration to make the Stacks PHP
files visible to the web server and to provide a easily readable URL to access
them:
<Directory "/usr/local/share/stacks/php">
Order deny,allow
Deny from all
Allow from all
Require all granted
</Directory>
Alias /stacks "/usr/local/share/stacks/php"
A sensible way to do this is to create the file stacks.conf with the above lines.
If you are using Apache 2.3 or earlier:
---------------------------------------
Place the stacks.conf file in either
/etc/apache2/conf.d/ or /etc/httpd/conf.d/
directory (depending on your Linux distro) and
restart the apache server:
# vi /etc/apache2/conf.d/stacks.conf
# apachectl restart
(See the Apache configuration for more information on what these do:
http://httpd.apache.org/docs/2.0/mod/core.html#directory)
If you are using Apache 2.4 or later:
---------------------------------------
Place the stacks.conf file in
/etc/apache2/conf-available
directory and then create a symlink to it in the
/etc/apache2/conf-enabled
directory. Then restart Apache. Like so:
# vi /etc/apache2/conf-available/stacks.conf
# ln -s /etc/apache2/conf-available/stacks.conf /etc/apache2/conf-enabled/stacks.conf
# apachectl restart
3. Provide access to the MySQL database from the web interface
Edit the PHP configuration file (constants.php.dist) to allow it access to the
MySQL database. Change the file to include the proper database
username ($db_user), password ($db_pass), and hostname ($db_host). Rename the
distribution file so it is active.
% cp /usr/local/share/stacks/php/constants.php.dist /usr/local/share/stacks/php/constants.php
% vi /usr/local/share/stacks/php/constants.php
You may find it advantageous to create a specific MySQL user with limited
permissions - SELECT, UPDATE, and DELETE to allow users to interact
with the database through the web interface.
4. Enable web-based exporting from the MySQL database.
Edit the stacks_export_notify.pl script to specify the email and SMTP
server to use in notification messages.
Ensure that the permissions of the php/export directory allow the
webserver to write to it. Assuming your web server user is 'www':
% chown www /usr/local/share/stacks/php/export
The pipeline is now ready to run.
......@@ -3,9 +3,6 @@
/* Define to 1 if the `closedir' function returns void instead of `int'. */
#undef CLOSEDIR_VOID
/* Enable compilation with Samtools BAM library */
#undef HAVE_BAM
/* Define to 1 if you have the `clock_gettime' function. */
#undef HAVE_CLOCK_GETTIME
......
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.63 for Stacks 2.0Beta8c.
# Generated by GNU Autoconf 2.63 for Stacks 2.0Beta9.
#
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
# 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
......@@ -594,11 +594,12 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='Stacks'
PACKAGE_TARNAME='stacks'
PACKAGE_VERSION='2.0Beta8c'
PACKAGE_STRING='Stacks 2.0Beta8c'
PACKAGE_VERSION='2.0Beta9'
PACKAGE_STRING='Stacks 2.0Beta9'
PACKAGE_BUGREPORT=''
ac_unique_file="src/ustacks.cc"
ac_unique_file="htslib/hts.c"
# Factoring default headers for most tests.
ac_includes_default="\
#include <stdio.h>
......@@ -667,8 +668,6 @@ CPPFLAGS
LDFLAGS
CXXFLAGS
CXX
BAM_LIBS
BAM_CFLAGS
am__untar
am__tar
AMTAR
......@@ -732,7 +731,6 @@ SHELL'
ac_subst_files=''
ac_user_opts='
enable_option_checking
enable_bam
enable_dependency_tracking
enable_openmp
'
......@@ -1300,7 +1298,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures Stacks 2.0Beta8c to adapt to many kinds of systems.
\`configure' configures Stacks 2.0Beta9 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
......@@ -1366,7 +1364,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of Stacks 2.0Beta8c:";;
short | recursive ) echo "Configuration of Stacks 2.0Beta9:";;
esac
cat <<\_ACEOF
......@@ -1374,7 +1372,6 @@ Optional Features:
--disable-option-checking ignore unrecognized --enable/--with options
--disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
--enable-FEATURE[=ARG] include FEATURE [ARG=yes]
--disable-bam Disable use of BAM files through HTSLib.
--disable-dependency-tracking speeds up one-time build
--enable-dependency-tracking do not reject slow dependency extractors
--disable-openmp do not use OpenMP
......@@ -1456,7 +1453,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
Stacks configure 2.0Beta8c
Stacks configure 2.0Beta9
generated by GNU Autoconf 2.63
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
......@@ -1470,7 +1467,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by Stacks $as_me 2.0Beta8c, which was
It was created by Stacks $as_me 2.0Beta9, which was
generated by GNU Autoconf 2.63. Invocation command line was
$ $0 $@
......@@ -2319,7 +2316,7 @@ fi
# Define the identity of the package.
PACKAGE='stacks'
VERSION='2.0Beta8c'
VERSION='2.0Beta9'
cat >>confdefs.h <<_ACEOF
......@@ -2360,6 +2357,7 @@ am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'
ac_config_headers="$ac_config_headers config.h"
......@@ -2367,28 +2365,15 @@ ac_config_headers="$ac_config_headers config.h"
#
# Get BAM, aka HTSLib library and include locations if requested
#
# Check whether --enable-bam was given.
if test "${enable_bam+set}" = set; then
enableval=$enable_bam;
fi
if test "x$enable_bam" != "xno"; then
cat >>confdefs.h <<\_ACEOF
#define HAVE_BAM 1
_ACEOF
BAM_CFLAGS='-I./htslib/htslib'
BAM_LIBS='./htslib/libhts.a'
fi
#AC_ARG_ENABLE([bam],
# AS_HELP_STRING([--disable-bam], [Disable use of BAM files through HTSLib.]))
#AS_IF([test "x$enable_bam" != "xno"], [
#AC_DEFINE([HAVE_BAM], [1], [Enable compilation with Samtools BAM library])
#BAM_CFLAGS='-I./htslib/htslib'
#AC_SUBST([BAM_CFLAGS])
#BAM_LIBS='./htslib/libhts.a'
#AC_SUBST([BAM_LIBS])
#])
# Checks for programs.
ac_ext=cpp
......@@ -8166,7 +8151,7 @@ fi
done
ac_config_files="$ac_config_files Makefile"
ac_config_files="$ac_config_files Makefile htslib/Makefile"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
......@@ -8616,7 +8601,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by Stacks $as_me 2.0Beta8c, which was
This file was extended by Stacks $as_me 2.0Beta9, which was
generated by GNU Autoconf 2.63. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
......@@ -8679,7 +8664,7 @@ Report bugs to <bug-autoconf@gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
Stacks config.status 2.0Beta8c
Stacks config.status 2.0Beta9
configured by $0, generated by GNU Autoconf 2.63,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
......@@ -8805,6 +8790,7 @@ do
"config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
"depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
"Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
"htslib/Makefile") CONFIG_FILES="$CONFIG_FILES htslib/Makefile" ;;
*) { { $as_echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
$as_echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
......
......@@ -2,28 +2,26 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.59)
AC_INIT([Stacks], [2.0Beta8c])
AC_INIT([Stacks], [2.0Beta9])
AC_CONFIG_AUX_DIR([config])
AM_INIT_AUTOMAKE([-Wall -Werror foreign parallel-tests subdir-objects])
AC_CONFIG_SRCDIR([src/ustacks.cc])
AC_CONFIG_SRCDIR([htslib/hts.c])
AC_CONFIG_HEADERS([config.h])
m4_pattern_allow([AC_OPENMP])
#
# Get BAM, aka HTSLib library and include locations if requested
#
AC_ARG_ENABLE([bam],
AS_HELP_STRING([--disable-bam], [Disable use of BAM files through HTSLib.]))
AS_IF([test "x$enable_bam" != "xno"], [
AC_DEFINE([HAVE_BAM], [1], [Enable compilation with Samtools BAM library])
BAM_CFLAGS='-I./htslib/htslib'
AC_SUBST([BAM_CFLAGS])
BAM_LIBS='./htslib/libhts.a'
AC_SUBST([BAM_LIBS])
])
#AC_ARG_ENABLE([bam],
# AS_HELP_STRING([--disable-bam], [Disable use of BAM files through HTSLib.]))
#AS_IF([test "x$enable_bam" != "xno"], [
#AC_DEFINE([HAVE_BAM], [1], [Enable compilation with Samtools BAM library])
#BAM_CFLAGS='-I./htslib/htslib'
#AC_SUBST([BAM_CFLAGS])
#BAM_LIBS='./htslib/libhts.a'
#AC_SUBST([BAM_LIBS])
#])
# Checks for programs.
AC_PROG_CXX
......@@ -99,5 +97,5 @@ AC_REGEX_FUNC
# For test harness
AC_PROG_AWK
AC_CONFIG_FILES([Makefile])
AC_CONFIG_FILES([Makefile htslib/Makefile])
AC_OUTPUT
This diff is collapsed.
#!/usr/bin/env perl
#
# Copyright 2011, Julian Catchen <jcatchen@uoregon.edu>
#
# This file is part of Stacks.
#
# Stacks is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Stacks is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Stacks. If not, see <http://www.gnu.org/licenses/>.
#
use strict;
use constant stacks_version => "_VERSION_";
my $debug = 0;
my $paired = 0;
my $amos = 0;
my $ins_len_dist = 0;
my $single_path = "";
my $paired_path = "";
my $sanger_path = "";
my $out_path = ".";
my $white_list = "";
my $hash_len = 27;
my $insert_len = 0;
my $exp_cov = 0;
my $cov_cut = 0.0;
my $min_len = 0;
my $read_trk = 0;
my $clean = 1;
my $collate = 0;
my $exe_path = "";
my $velveth = "velveth";
my $velvetg = "velvetg";
parse_command_line();
if (length($exe_path) > 0) {
$velveth = $exe_path . "/" . $velveth;
$velvetg = $exe_path . "/" . $velvetg;
}
#
# Test that we can execute the velvet programs
#
die ("Unable to find '" . $velveth . "'.\n") if (!-e $velveth || !-x $velveth);
die ("Unable to find '" . $velvetg . "'.\n") if (!-e $velvetg || !-x $velvetg);
my (@locus_files, $num_files, $file, $input_file, $output_file, $hres_file, $gres_file, $collate_fh);
build_file_list(\@locus_files);
$num_files = scalar(@locus_files);
if ($collate) {
open($collate_fh, ">$out_path/collated.fa") or die("Unable to open collate file, $!\n");
}
my ($sing_data, $pair_data, $sang_data, $ins, $cov, $afg, $cut, $min, $read, $cln);
$ins = $paired > 0 ? "-ins_length $insert_len" : "-ins_length auto";
$cov = $paired > 0 ? "-exp_cov $exp_cov" : "-exp_cov auto";
$cut = $cov_cut > 0 ? "-cov_cutoff $cov_cut" : "-cov_cutoff auto";
$read = $read_trk > 0 ? "-read_trkg yes" : "";
$cln = $clean > 0 ? "-very_clean yes" : "";
#$min = $min_len > 0 ? "-min_contig_lgth $min_len" : ""
#
# Write out the parameters for this assembly
#
open(PARAM, "> $out_path/velvet_parameters.txt") or die("Unable to open parameter file: $!\n");
print PARAM
"Single-end Path: ", $single_path, "\n",
"Paired-end Path: ", $paired_path, "\n",
"Sanger Path: ", $sanger_path, "\n",
"Hash Length: ", $hash_len, "\n",
"Insert Length: ", $ins, "\n",
"Coverage: ", $cov, "\n",
"Coverage Cutoff: ", $cut, "\n",
"Miniumum contig length: ", $min, "\n",
"Read tracking: ", $read, "\n",
"Very Clean: ", $cln, "\n";
close(PARAM);
my $i = 1;
foreach $file (@locus_files) {
($file) = ($file =~ /(.+)\.fa/);
$output_file = $out_path . "/" . $file;
$hres_file = $out_path . "/" . $file . "-h.output";
$gres_file = $out_path . "/" . $file . "-g.output";
$sing_data = length($single_path) > 0 ? '-short ' . $single_path . "/" . $file . ".fa" : "";
$pair_data = length($paired_path) > 0 ? '-shortPaired ' . $paired_path . "/" . $file . ".fa" : "";
$sang_data = length($sanger_path) > 0 ? '-long ' . $sanger_path . "/" . $file . ".fa" : "";
print STDERR "Assembling locus '$file'; run $i of $num_files \r";
# Execute velveth to build hash table, then velvetg to assemble
print STDERR "$velveth $output_file $hash_len -fasta $sing_data $pair_data $sang_data &> $hres_file\n" if ($debug);
`$velveth $output_file $hash_len -fasta $sing_data $pair_data $sang_data &> $hres_file`;
print STDERR "$velvetg $output_file $ins $cov $cut $min $read $cln &> $gres_file\n" if ($debug);
`$velvetg $output_file $ins $cov $cut $min $read $cln &> $gres_file`;
collate_and_clean($out_path, $file, $collate_fh) if ($collate);
$i++;
}
close($collate_fh) if ($collate);
sub collate_and_clean {
my ($out_path, $file, $collate_fh) = @_;
my (@seqs, $seq);
parse_fasta("$out_path/$file/contigs.fa", \@seqs);
foreach $seq (@seqs) {
next if (length($seq->{'seq'}) < $min_len);
$seq->{'id'} = $file . "|" . $seq->{'id'};
print_fasta($collate_fh, $seq);
}
`rm $out_path/$file-g.output`;
`rm $out_path/$file-h.output`;
`rm -r $out_path/$file`;
}
sub parse_fasta {
my ($file, $seqs) = @_;
my ($fh, $line, $buf, $id, $seq);
open($fh, "<$file")
or die("Unable to open Velvet output file: $file, $!\n");
while ($line = <$fh>) {
chomp $line;
if (substr($line, 0, 1) eq ">") {
if (length($buf) > 0) {
$seq = {};
$seq->{'id'} = $id;
$seq->{'seq'} = $buf;
push(@{$seqs}, $seq);
$buf = "";
}
$id = substr($line, 1);
} else {
$buf .= $line;
}
}
if (length($buf) > 0 && length($id) > 0) {
$seq = {};
$seq->{'id'} = $id;
$seq->{'seq'} = $buf;
push(@{$seqs}, $seq);
}
close($fh);
}
sub print_fasta {
my ($fh, $seq) = @_;
my ($s);
print $fh ">", $seq->{'id'}, "\n";
$s = $seq->{'seq'};
while (length($s) > 60) {
print $fh substr($s, 0, 60), "\n";
$s = substr($s, 60);
}
print $fh $s, "\n" if (length($s) > 0);
}
sub build_file_list {
my ($files) = @_;
my (@ls, $line, $file, $path);
# Load a white list of files to process if it is supplied.
my @wl;
if (length($white_list) > 0) {
load_white_list(\@wl);
}
$path = length($paired_path) > 0 ? $paired_path : $single_path;
@ls = `ls -1 $path/`;
foreach $line (@ls) {
chomp $line;
next if (length($line) == 0);
next if ($line !~ /.+\.fa$/ && $line !~ /.+\.fasta$/);
($file) = ($line =~ /^(.+\.fas?t?a?)/);
if (scalar(@wl) > 0) {
next if (!grep(/^$file$/, @wl));
}
push(@{$files}, $file);
}
}
sub load_white_list {
my ($wl) = @_;
open(WHITE, "<" . $white_list)
or die("Unable to open white list file '$white_list': $!\n");
my $line = "";
while ($line = <WHITE>) {
chomp $line;
next if (length($line) == 0);
push(@{$wl}, $line);
}
close(WHITE);
}
sub parse_command_line {
while (@ARGV) {
$_ = shift @ARGV;
if ($_ =~ /^-s$/) { $single_path = shift @ARGV; }
elsif ($_ =~ /^-p$/) { $paired_path = shift @ARGV; }
elsif ($_ =~ /^-l$/) { $sanger_path = shift @ARGV; }
elsif ($_ =~ /^-o$/) { $out_path = shift @ARGV; }
elsif ($_ =~ /^-c$/) { $collate++; }
elsif ($_ =~ /^-W$/) { $white_list = shift @ARGV; }
elsif ($_ =~ /^-I$/) { $insert_len = shift @ARGV; }
elsif ($_ =~ /^-C$/) { $exp_cov = shift @ARGV; }
elsif ($_ =~ /^-T$/) { $cov_cut = shift @ARGV; }
elsif ($_ =~ /^-R$/) { $read_trk = shift @ARGV; }
elsif ($_ =~ /^-M$/) { $min_len = shift @ARGV; }
elsif ($_ =~ /^-H$/) { $hash_len = shift @ARGV; }
elsif ($_ =~ /^-P$/) { $paired++; }
elsif ($_ =~ /^-L$/) { $clean = 0; }
elsif ($_ =~ /^-e$/) { $exe_path = shift @ARGV; }
elsif ($_ =~ /^-v$/) { version(); exit(); }
elsif ($_ =~ /^-d$/) { $debug++; }
elsif ($_ =~ /^-h$/) { usage(); }
else {
print STDERR "Unknown command line option '$_'\n";
usage();
}
}
$single_path = substr($single_path, 0, -1) if (substr($single_path, -1) eq "/");
$paired_path = substr($paired_path, 0, -1) if (substr($paired_path, -1) eq "/");
$out_path = substr($out_path, 0, -1) if (substr($out_path, -1) eq "/");
$exe_path = substr($exe_path, 0, -1) if (substr($exe_path, -1) eq "/");
}
sub version {
print STDERR "exec_velvet.pl ", stacks_version, "\n";
}
sub usage {
version();
print STDERR <<EOQ;
exec_velvet.pl -p path -s path [-l path] -o path [-c] [-H len] [-P] [-I len] [-C exp_cov] [-T cov_cut]
[-W file_white_list] [-w marker_white_list] [-L] [-e path] [-d] [-h]
p: path to the paired-end FASTA files to assemble.
s: path to the single-end FASTA files to assemble.
l: path to long, sanger-style reads to assemble.
o: path to output the assembled files.
c: collate the resulting velvet runs into a single FASTA file, clean velvet directories.
W: a white list of files to process in the input path.
H: length of overlap required for reads (hash length, default 27).
P: process paired-end reads.
I: insert length (for paired-end reads; see velvet documentation).
C: expected coverage (for paired-end reads).
T: coverage cutoff.
M: minimum contig length, discard contigs shorter than this value.
R: turn on velvet's read tracking (uses additional memory).
L: leave velvet's intermediate files behind.
e: executable path, location of velvet programs.
h: display this help message.
d: turn on debug output.
EOQ
exit(0);
}
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/env perl
#
# Copyright 2011-2014, Julian Catchen <jcatchen@uoregon.edu>
#
# This file is part of Stacks.
#
# Stacks is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Stacks is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Stacks. If not, see <http://www.gnu.org/licenses/>.
#
#
# Load a set of output files from the Stacks pipeline into a Stacks MySQL database.
#
# By Julian Catchen <jcatchen@uoregon.edu>
#
use strict;
use POSIX;
use File::Temp qw/ mktemp /;
use File::Spec;
use constant stacks_version => "_VERSION_";
my $mysql_config = "_PKGDATADIR_" . "sql/mysql.cnf";
my $dry_run = 0;
my $db = "";
my $in_path = ".";
my $sample_id = 0;
my $desc = "";
my $date = "";
my $batch_id = 0;
my $batch = 0;
my $catalog = 0;
my $stacks_type = "";
my $popmap_path = "";
my $ignore_tags = 0;
my $white_list = "";
parse_command_line();
my $cnf = (-e $ENV{"HOME"} . "/.my.cnf") ? $ENV{"HOME"} . "/.my.cnf" : $mysql_config;
if (length($date) == 0) {
$date = strftime("%Y-%m-%d", (localtime(time)));
}
my (@results, @files, @catalog, @parent_ids, @pop_ids, %pops, %sample_ids);
build_file_list(\@files, \@catalog);
extract_parental_ids(scalar(@files), \@catalog, \@parent_ids);
extract_sample_ids(\@files, \%sample_ids);
parse_population_map(\@files, \%sample_ids, \@pop_ids, \%pops);
print STDERR
"Stacks pipeline type: '", $stacks_type, "'\n",
scalar(@files), " files to process: ", join(", ", @files), "\n",
scalar(@catalog), " catalog files to process: ", join(", ", @catalog), "\n";
if ($stacks_type eq "map") {
print STDERR
scalar(@parent_ids), " parent IDs identified: ", join(", ", @parent_ids), "\n";
}
if ($batch) {
if (!$dry_run) {
@results = `mysql --defaults-file=$cnf $db -e "INSERT INTO batches SET id=$batch_id, description='$desc', date='$date', type='$stacks_type'"`;
}
print STDERR
"mysql --defaults-file=$cnf $db ",
"-e \"INSERT INTO batches SET id=$batch_id, description='$desc', date='$date', type='$stacks_type'\"\n",
@results;
}
my ($file, $f, $i, $cnt, $type, $pop_id);
#
# Import the catalog
#
if ($catalog) {
foreach $file (@catalog) {
$f = $in_path . "/$file" . ".catalog.tags.tsv";
if (-e $f) {
import_sql_file($f, "catalog_tags", 1);
} elsif (-e $f . ".gz") {
$f = $in_path . "/$file" . ".catalog.tags.tsv.gz";
import_gzsql_file($f, "catalog_tags", 1);
}
$f = $in_path . "/$file" . ".catalog.snps.tsv";
if (-e $f) {
import_sql_file($f, "catalog_snps", 1);
} elsif (-e $f . ".gz") {
$f = $in_path . "/$file" . ".catalog.snps.tsv.gz";
import_gzsql_file($f, "catalog_snps", 1);
}
$f = $in_path . "/$file" . ".catalog.alleles.tsv";
if (-e $f) {
import_sql_file($f, "catalog_alleles", 1);
} elsif (-e $f . ".gz") {
$f = $in_path . "/$file" . ".catalog.alleles.tsv.gz";
import_gzsql_file($f, "catalog_alleles", 1);
}
}
}
if ($stacks_type eq "map") {
$f = "$in_path/batch_" . $batch_id . ".markers.tsv";
import_sql_file($f, "markers", 1);
$f = "$in_path/batch_" . $batch_id . ".genotypes_1.txt";
import_sql_file($f, "catalog_genotypes", 1);
} elsif ($stacks_type eq "population") {
$f = "$in_path/batch_" . $batch_id . ".markers.tsv";
import_sql_file($f, "markers", 1);
$f = "$in_path/batch_" . $batch_id . ".sumstats.tsv";
import_sql_file($f, "sumstats", scalar(keys %pops) + 1);
$f = "$in_path/batch_" . $batch_id . ".hapstats.tsv";
import_sql_file($f, "hapstats", scalar(keys %pops) + 1);
#
# Import the Fst files.
#
my $fst_cnt = 0;
my (@keys, $m, $n);
@keys = sort keys %pops;
for ($m = 0; $m < scalar(@keys); $m++) {
for ($n = 0; $n < scalar(@keys); $n++) {
$f = "$in_path/batch_" . $batch_id . ".fst_" . $keys[$m] . "-" . $keys[$n] . ".tsv";
if (-e $file) {
import_sql_file($f, "fst", 1);
$fst_cnt++;
}
}
}
print STDERR "Imported $fst_cnt SNP Fst file(s).\n";
#
# Import the Phi_st files.
#
$fst_cnt = 0;
for ($m = 0; $m < scalar(@keys); $m++) {
for ($n = 0; $n < scalar(@keys); $n++) {
$f = "$in_path/batch_" . $batch_id . ".phistats_" . $keys[$m] . "-" . $keys[$n] . ".tsv";
if (-e $file) {
import_sql_file($f, "phist", 3);
$fst_cnt++;
}
}
}
print STDERR "Imported $fst_cnt Haplotype Fst file(s).\n";
}
$i = 1;
$cnt = scalar(@files);
foreach $file (sort {$sample_ids{$a} <=> $sample_ids{$b}} @files) {
print STDERR "Processing sample $i of $cnt\n";
$f = $in_path . "/$file" . ".matches.tsv";
if (-e $f) {
import_sql_file($f, "matches", 1);
} elsif (-e $f . ".gz") {
$f = $in_path . "/$file" . ".matches.tsv.gz";
import_gzsql_file($f, "matches", 1);
}
$i++;
}
$i = 1;
foreach $file (sort {$sample_ids{$a} <=> $sample_ids{$b}} @files) {
print STDERR "Processing sample $i of $cnt\n";
#
# Pull out the sample ID and insert it into the database
#
$sample_id = $sample_ids{$file};
if ($stacks_type eq "map") {
$type = (grep(/^$sample_id$/, @parent_ids) > 0) ? 'parent' : 'progeny';
} else {
$type = "sample";
}
$pop_id = shift(@pop_ids);
if (!$dry_run) {
@results = `mysql --defaults-file=$cnf $db -e "INSERT INTO samples SET id=$sample_id, sample_id=$sample_id, batch_id=$batch_id, type='$type', file='$file', pop_id='$pop_id'"`;
}
print STDERR
"mysql --defaults-file=$cnf $db ",
"-e \"INSERT INTO samples SET id=$sample_id, sample_id=$sample_id, batch_id=$batch_id, type='$type', file='$file', pop_id='$pop_id'\"\n",
@results;
$f = $in_path . "/$file" . ".tags.tsv";
if (-e $f) {
import_sql_file($f, "unique_tags", 1) if ($ignore_tags == 0);
} elsif (-e $f . ".gz") {
$f = $in_path . "/$file" . ".tags.tsv.gz";
import_gzsql_file($f, "unique_tags", 1) if ($ignore_tags == 0);
}
$f = $in_path . "/$file" . ".snps.tsv";
if (-e $f) {
import_sql_file($f, "snps", 1);
} elsif (-e $f . ".gz") {
$f = $in_path . "/$file" . ".snps.tsv.gz";
import_gzsql_file($f, "snps", 1);
}
$f = $in_path . "/$file" . ".alleles.tsv";
if (-e $f) {
import_sql_file($f, "alleles", 1);
} elsif (-e $f . ".gz") {
$f = $in_path . "/$file" . ".alleles.tsv.gz";
import_gzsql_file($f, "alleles", 1);
}
$i++;
}
print STDERR "\nDon't forget to index your Stacks database -- run index_radtags.pl\n\n";
sub parse_population_map {
my ($samples, $sample_ids, $pop_ids, $pops) = @_;
my ($fh, @parts, $line, %ids, $file, $path);
if (length($popmap_path) == 0) {
foreach $path (@{$samples}) {
push(@{$pop_ids}, 1);
$pops->{1}++;
}
return;
}
open($fh, "<$popmap_path") or die("Unable to open population map, '$popmap_path', $!\n");
while ($line = <$fh>) {
chomp $line;
@parts = split(/\t/, $line);
if (scalar(@parts) > 3) {
die("Unable to parse population map, '$popmap_path' (map should contain no more than three columns).\n");
}
$ids{$parts[0]} = $parts[1];
}
foreach $file (sort {$sample_ids->{$a} <=> $sample_ids->{$b}} @{$samples}) {
if (!defined($ids{$file})) {
die("Unable to find '$file' in the population map, '$popmap_path'.\n");
}
push(@{$pop_ids}, $ids{$file});
$pops->{$ids{$file}}++;
}
print STDERR "Parsed population map: ", scalar(@{$samples}), " files in ", scalar(keys %pops), " populations.\n";
close($fh);
}
sub extract_parental_ids {
my ($sample_cnt, $catalog, $parental_ids) = @_;
my ($fh, $prefix, $path, $line, @parts, $tag_id, @tag_ids, $id, $tag, %ids);
print STDERR "Scanning catalog for sample IDs...";
foreach $prefix (@catalog) {
$path = $in_path . "/" . $prefix . ".catalog.tags.tsv";
if (-e $path) {
open($fh, "<$path") or die("Unable to open catalog file: '$path', $!\n");
} elsif (-e $path . ".gz") {
open($fh, "gunzip -c " . $path . ".gz |") or die("Unable to open catalog file: '$path', $!\n");
}
while ($line = <$fh>) {
chomp $line;
@parts = split(/\t/, $line);
@tag_ids = split(/,/, $parts[8]);
foreach $tag_id (@tag_ids) {
($id, $tag) = split(/_/, $tag_id);
$ids{$id}++;
}
}
close($fh);
}
@{$parental_ids} = keys %ids;
#
# Determine the type of pipeline run: either a 'map' or a 'population' type.
# If all samples are parental, i.e. in the catalog, then this is a population type
# otherwise, it is a map type.
#
if (length($stacks_type) == 0) {
$stacks_type = (scalar(@{$parental_ids}) == $sample_cnt) ? "population" : "map";
}
print STDERR "done.\n";
}
sub extract_sample_ids {
my ($files, $sample_ids) = @_;
my ($file, $f, $line, @results, @parts);
print STDERR "Collecting sample IDs from Stacks output files...";
foreach $file (@{$files}) {
$f = $in_path . "/$file" . ".tags.tsv";
if (-e $f) {
@results = `head -n 2 $f | tail -n 1`;
} elsif (-e $f . ".gz") {
$f = $in_path . "/$file" . ".tags.tsv.gz";
@results = `gunzip -c $f | head -n 2 | tail -n 1`;
} else {
die("Unable to find file $f\n");
}
chomp $results[0];
@parts = split(/\t/, $results[0]);
#
# Sample ID is expected to be the first column in the *.tags.tsv file.
#
$sample_ids->{$file} = $parts[1];
}
print STDERR "done.\n";
}
sub import_sql_file {
my ($file, $table, $skip_lines) = @_;
my (@results, $ignore);
if (!-e $file) {
print STDERR "File '$file' does not exist.\n";
return;
}
$ignore = " IGNORE $skip_lines LINES" if ($skip_lines > 0);
if (!$dry_run) {
@results = `mysql --defaults-file=$cnf $db -e "LOAD DATA LOCAL INFILE '$file' INTO TABLE $table$ignore"`;
}
print STDERR
"mysql --defaults-file=$cnf $db ",
"-e \"LOAD DATA LOCAL INFILE '$file' INTO TABLE $table$ignore\"\n",
@results;
}
sub import_gzsql_file {
my ($file, $table, $skip_lines) = @_;
my (@results, $ignore);
if (!-e $file) {
print STDERR "File '$file' does not exist.\n";
return;
}
$ignore = "IGNORE $skip_lines LINES" if ($skip_lines > 0);
#
# Get a temporary file name and create a named pipe.
#
my $tmpdir = File::Spec->tmpdir();
my $named_pipe = mktemp($tmpdir . "/denovo_map_XXXXXX");
if ($dry_run == 0) {
mkfifo($named_pipe, 0700) || die("Unable to create named pipe for loading gzipped data: $named_pipe, $!");
}
print STDERR "Streaming $file into named pipe $named_pipe.\n";
#
# Dump our gzipped data onto the named pipe.
#
system("gunzip -c $file > $named_pipe &") if ($dry_run == 0);
@results = `mysql --defaults-file=$cnf $db -e "LOAD DATA LOCAL INFILE '$named_pipe' INTO TABLE $table $ignore"` if ($dry_run == 0);
print STDERR "mysql --defaults-file=$cnf $db -e \"LOAD DATA LOCAL INFILE '$named_pipe' INTO TABLE $table $ignore\"\n", @results;
#
# Remove the pipe.
#
unlink($named_pipe) if ($dry_run == 0);
}
sub build_file_list {
my ($files, $catalog_files) = @_;
my (@wl, @ls, $line, $prefix);
# Load a white list of files to process if it is supplied.
if (length($white_list) > 0) {
load_white_list(\@wl);
}
@ls = `ls -1 $in_path/*.tags.tsv* 2> /dev/null`;
if (scalar(@ls) == 0) {
print STDERR "Unable to locate any input files to process within '$in_path'\n";
usage();
}
foreach $line (@ls) {
chomp $line;
if ($line =~ /\.tags\.tsv\.gz$/) {
($prefix) = ($line =~ /$in_path\/(.+)\.tags\.tsv\.gz/);
} else {
($prefix) = ($line =~ /$in_path\/(.+)\.tags\.tsv/);
}
next if ($prefix =~ /catalog/);
if (scalar(@wl) > 0) {
next if (!grep(/^$prefix$/, @wl));
}
push(@{$files}, $prefix);
}
if ($catalog > 0) {
@ls = `ls -1 $in_path/*.catalog.tags.tsv* 2> /dev/null`;
if (scalar(@ls) == 0) {
print STDERR "Unable to locate any catalog input files to process within '$in_path'\n";
usage();
}
foreach $line (@ls) {
chomp $line;
if ($line =~ /\.catalog\.tags\.tsv\.gz$/) {
($prefix) = ($line =~ /$in_path\/(.+)\.catalog\.tags\.tsv\.gz/);
} else {
($prefix) = ($line =~ /$in_path\/(.+)\.catalog\.tags\.tsv/);
}
if (scalar(@wl) > 0) {
next if (!grep(/^$prefix$/, @wl));
}
push(@{$catalog_files}, $prefix);
}
}
}
sub load_white_list {
my ($wl) = @_;
open(WHITE, "<" . $white_list)
or die("Unable to open white list file '$white_list': $!\n");
my $line = "";
while ($line = <WHITE>) {
chomp $line;
next if (length($line) == 0);
push(@{$wl}, $line);
}
close(WHITE);
}
sub parse_command_line {
while (@ARGV) {
$_ = shift @ARGV;
if ($_ =~ /^-p$/) { $in_path = shift @ARGV; }
elsif ($_ =~ /^-D$/) { $db = shift @ARGV; }
elsif ($_ =~ /^-c$/) { $catalog++; }
elsif ($_ =~ /^-B$/) { $batch++; }
elsif ($_ =~ /^-b$/) { $batch_id = shift @ARGV; }
elsif ($_ =~ /^-e$/) { $desc = shift @ARGV; }
elsif ($_ =~ /^-a$/) { $date = shift @ARGV; }
elsif ($_ =~ /^-W$/) { $white_list = shift @ARGV; }
elsif ($_ =~ /^-t$/) { $stacks_type = shift @ARGV; }
elsif ($_ =~ /^-M$/) { $popmap_path = shift @ARGV; }
elsif ($_ =~ /^-U$/) { $ignore_tags++; }
elsif ($_ =~ /^-d$/) { $dry_run++; }
elsif ($_ =~ /^-v$/) { version(); exit(); }
elsif ($_ =~ /^-h$/) { usage(); }
else {
print STDERR "Unknown command line option: '$_'\n";
usage();
}
}
$in_path = substr($in_path, 0, -1) if (substr($in_path, -1) eq "/");
if (length($db) == 0) {
print STDERR "You must specify a database.\n";
usage();
}
}
sub version {
print STDERR "load_radtags.pl ", stacks_version, "\n";
}
sub usage {
version();
print STDERR <<EOQ;
load_radtags.pl -D db -p path -b batch_id [-B -e desc] [-c] [-M pop_map] [-d] [-t] [-W path] [-U] [-d] [-h]
D: Database to load data into.
p: Path to input files.
b: Batch ID.
M: if you have analyzed several populations, specify a population map.
c: Load the catalog into the database.
B: Load information into batch table.
e: batch dEscription.
d: perform a dry run. Do not actually load any data, just print what would be executed.
W: only load file found on this white list.
U: do not load stacks to unique_tags table to save database space.
t: pipeline type (either 'map' or 'population'), load_radtags.pl will guess based on the number or indiviuduals used to build the catalog.
h: display this help message.
EOQ
exit(0);
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -47,5 +47,5 @@ exec &> >(tee stacks-gdb.log)
gdb --quiet --args "$@" <<EOF
catch throw
run
backtrace
backtrace full
EOF
This diff is collapsed.