Michael R. Crusoe · Michael R. Crusoe · Michael R. Crusoe · Michael R. Crusoe · Michael R. Crusoe · Michael R. Crusoe
--- a/.circleci/common.sh
+++ b/.circleci/common.sh
-SINGULARITY_VER=2.5.1
+SINGULARITY_VER=3.3.0
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -27,13 +27,16 @@ jobs:
            # TODO only install if singularity is not yet present
            # if type singularity > /dev/null; then exit 0; fi
            source .circleci/common.sh
-            sudo apt-get update; sudo apt-get install squashfs-tools libarchive-dev
-            wget https://github.com/singularityware/singularity/releases/download/$SINGULARITY_VER/singularity-$SINGULARITY_VER.tar.gz
-            tar xvf singularity-$SINGULARITY_VER.tar.gz
-            cd singularity-$SINGULARITY_VER
-            ./configure --prefix=/usr/local --sysconfdir=/etc
-            make
-            sudo make install
+            sudo add-apt-repository ppa:gophers/archive
+            sudo apt-get update
+            sudo apt-get install build-essential libssl-dev uuid-dev libgpgme11-dev libseccomp-dev wget pkg-config squashfs-tools libarchive-dev golang-1.11
+            export PATH=/usr/lib/go-1.11/bin:$PATH
+            wget https://github.com/sylabs/singularity/releases/download/v${SINGULARITY_VER}/singularity-${SINGULARITY_VER}.tar.gz
+            tar -xvf singularity-$SINGULARITY_VER.tar.gz
+            cd singularity
+            ./mconfig
+            make -C builddir
+            sudo make -C builddir install
      - run:
          name: Setup Snakemake
          command: |

--- a/.gitignore
+++ b/.gitignore
@@ -17,3 +17,6 @@ dist/
 .ipynb*
 .ropeproject
 .test*
+tests/test*/*
+playground/*
+tutorial/*
\ No newline at end of file
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
+[5.6.0] - 2019-09-06
+====================
+Changed
+-------
+- Fix compatibility with latest singularity versions.
+- Various bug fixes (e.g. in cluster error handling, remote providers, kubernetes backend).
+Added
+-----
+- Add --default-resources flag, that allows to define default resources for jobs (e.g. mem_mb, disk_mb), see `docs <https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#resources>`_.
+- Accept ``--dry-run`` as a synonym of ``--dryrun``. Other Snakemake options are similarly hyphenated, so other documentation now refers to ``--dry-run`` but both (and also ``-n``) will always be accepted equivalently.
+
 [5.5.4] - 2019-07-21
 ====================
 Changed

--- a/debian/README.source
+++ b/debian/README.source
@@ -21,7 +21,7 @@ Missing (optional) dependencies:

 Missing (optional) Python dependencies:

- moto
+- moto https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=777089
 - google.cloud (google-cloud-sdk)
 - ftputil
 - pysftp

--- a/debian/changelog
+++ b/debian/changelog
+snakemake (5.6.0-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * 0013-remove-duplicate-keyword-argument.patch removed, applied upstream
+  * Add AutoPkgTests
+
+ -- Michael R. Crusoe <michael.crusoe@gmail.com>  Fri, 13 Sep 2019 16:15:23 +0200
+
 snakemake (5.5.4-2) unstable; urgency=medium

  * set $HOME to fix build on sbuild

--- a/debian/patches/0013-remove-duplicate-keyword-argument.patch
+++ b/debian/patches/0013-remove-duplicate-keyword-argument.patch
-From 6d013348a3501b6c183438cfb44bf78704128925 Mon Sep 17 00:00:00 2001
-From: Alistair Miles <alimanfoo@googlemail.com>
-Date: Mon, 29 Jul 2019 14:53:04 +0000
-Subject: [PATCH] Merged in
- alimanfoo/snakemake/Alistair-Miles/remove-duplicate-keyword-argument-1563308166092
- (pull request #397)
-
-remove duplicate keyword argument
---
- snakemake/remote/gfal.py | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
--- snakemake.orig/snakemake/remote/gfal.py
-+++ snakemake/snakemake/remote/gfal.py
-@@ -26,7 +26,7 @@
-     supports_default = True
-     allows_directories = True
- 
-    def __init__(self, *args, keep_local=False, stay_on_remote=False, is_default=False, stay_on_remote=False, retry=5, **kwargs):
-+    def __init__(self, *args, keep_local=False, stay_on_remote=False, is_default=False, retry=5, **kwargs):
-         super(RemoteProvider, self).__init__(*args, keep_local=keep_local, stay_on_remote=stay_on_remote, is_default=is_default, **kwargs)
-         self.retry = retry
- 
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -10,5 +10,4 @@
 # 0010-skip-test-without-rmarkdown.patch
 0011-fix-privacy-breach.patch
 0012-reproducible-build.patch
-0013-remove-duplicate-keyword-argument.patch
 boto3_is_just_boto
--- a/debian/rules
+++ b/debian/rules
@@ -7,7 +7,7 @@ export HOME=$(CURDIR)/fakehome
 export PYBUILD_NAME=snakemake
 export PYBUILD_DESTDIR_python3=debian/snakemake
 export PYBUILD_BEFORE_TEST_python3=chmod +x {dir}/bin/snakemake; cp -r {dir}/bin {dir}/tests {build_dir}
-export PYBUILD_TEST_ARGS=python{version} -m pytest tests/test*.py -v -k 'not report and not ancient and not test_script and not default_remote and not issue635 and not convert_to_cwl and not issue1083 and not issue1092 and not issue1093'
+export PYBUILD_TEST_ARGS=python{version} -m pytest tests/test*.py -v -k 'not report and not ancient and not test_script and not default_remote and not issue635 and not convert_to_cwl and not issue1083 and not issue1092 and not issue1093 and not test_remote and not test_default_resources'

 # test_report
 # test_ancient
@@ -17,6 +17,7 @@ export PYBUILD_TEST_ARGS=python{version} -m pytest tests/test*.py -v -k 'not rep
 # test_convert_to_cwl tries to build a singularity format software image from docker://quay.io/snakemake/snakemake:v5.5.4
 # test_issue1083 tries to build a singularity format software image from docker://bash
 # test_issue1093 fails due to conda usage; commenting that out and installing bwa produces a different ordering than desired
+# test_default_resources and test_remote needs moto to be packaged https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=777089

 export PYBUILD_AFTER_TEST_python3=rm -fr {build_dir}/bin {build_dir}/tests


--- a/debian/tests/control
+++ b/debian/tests/control
+Tests: run-unit-test
+Depends: @
+Restrictions: allow-stderr
--- a/debian/tests/run-unit-test
+++ b/debian/tests/run-unit-test
+#!/bin/bash
+set -e
+
+pkg=snakemake
+
+ROOT=$(pwd)
+
+if [ "${AUTOPKGTEST_TMP}" = "" ] ; then
+  AUTOPKGTEST_TMP=$(mktemp -d /tmp/${pkg}-test.XXXXXX)
+  # Double quote below to expand the temporary directory variable now versus
+  # later is on purpose.
+  # shellcheck disable=SC2064
+  trap "rm -rf ${AUTOPKGTEST_TMP}" 0 INT QUIT ABRT PIPE TERM
+fi
+
+cd "${AUTOPKGTEST_TMP}"
+
+python3 -m pytest ${ROOT}/tests/test*.py -v -k 'not report and not ancient and not test_script and not default_remote and not issue635 and not convert_to_cwl and not issue1083 and not issue1092 and not issue1093 and not test_remote and not test_default_resources and not test_singularity and not test_singularity_conda and not test_cwl_singularity and not test_cwl'
+
--- a/docs/executable.rst
+++ b/docs/executable.rst
@@ -172,7 +172,7 @@ The cluster command can be decorated with job specific information, e.g.

    $ snakemake --cluster "qsub {threads}"

-Thereby, all keywords of a rule are allowed (e.g. params, input, output, threads, priority, ...).
+Thereby, all keywords of a rule are allowed (e.g. rulename, params, input, output, threads, priority, ...).
 For example, you could encode the expected running time into params:

 .. code-block:: python
@@ -204,7 +204,7 @@ Else, the arguments will be interpreted as part of the normal Snakemake argument
 Job Properties
 ~~~~~~~~~~~~~~

-When executing a workflow on a cluster using the ``--cluster`` parameter (see below), Snakemake creates a job script for each job to execute. This script is then invoked using the provided cluster submission command (e.g. ``qsub``). Sometimes you want to provide a custom wrapper for the cluster submission command that decides about additional parameters. As this might be based on properties of the job, Snakemake stores the job properties (e.g. rule name, threads, input files, params etc.) as JSON inside the job script. For convenience, there exists a parser function `snakemake.utils.read_job_properties` that can be used to access the properties. The following shows an example job submission wrapper:
+When executing a workflow on a cluster using the ``--cluster`` parameter (see below), Snakemake creates a job script for each job to execute. This script is then invoked using the provided cluster submission command (e.g. ``qsub``). Sometimes you want to provide a custom wrapper for the cluster submission command that decides about additional parameters. As this might be based on properties of the job, Snakemake stores the job properties (e.g. name, rulename, threads, input, output, params etc.) as JSON inside the job script (for group jobs, the rulename will be "GROUP", otherwise it will be the same as the job name). For convenience, there exists a parser function `snakemake.utils.read_job_properties` that can be used to access the properties. The following shows an example job submission wrapper:

 .. code-block:: python

@@ -228,6 +228,8 @@ When executing a workflow on a cluster using the ``--cluster`` parameter (see be
    os.system("qsub -t {threads} {script}".format(threads=threads, script=jobscript))


+.. _profiles:
+
 --------
 Profiles
 --------

--- a/docs/project_info/faq.rst
+++ b/docs/project_info/faq.rst
@@ -493,7 +493,7 @@ To remove all files created by snakemake as output files to start from scratch,

 Only files that are output of snakemake rules will be removed, not those that serve as primary inputs to the workflow.
 Note that this will only affect the files involved in reaching the specified target(s).
-It is strongly advised to first run together with ``--dryrun`` to list the files that would be removed without actually deleting anything.
+It is strongly advised to first run together with ``--dry-run`` to list the files that would be removed without actually deleting anything.
 The flag ``--delete-temp-output`` can be used in a similar manner to only delete files flagged as temporary.


@@ -509,7 +509,7 @@ It is recommended to use the script directive instead (see :ref:`snakefiles-exte
 My workflow is very large, how do I stop Snakemake from printing all this rule/job information in a dry-run?
 ------------------------------------------------------------------------------------------------------------

-Indeed, the information for each individual job can slow down a dryrun if there are tens of thousands of jobs.
+Indeed, the information for each individual job can slow down a dry-run if there are tens of thousands of jobs.
 If you are just interested in the final summary, you can use the ``--quiet`` flag to suppress this.

 .. code-block:: console

--- a/docs/snakefiles/rules.rst
+++ b/docs/snakefiles/rules.rst
@@ -235,12 +235,19 @@ If limits for the resources are given via the command line, e.g.
    $ snakemake --resources mem_mb=100

 the scheduler will ensure that the given resources are not exceeded by running jobs.
-If no limits are given, the resources are ignored.
+If no limits are given, the resources are ignored in local execution.
+In cluster or cloud execution, resources are always passed to the backend, even if ``--resources`` is not specified.
 Apart from making Snakemake aware of hybrid-computing architectures (e.g. with a limited number of additional devices like GPUs) this allows to control scheduling in various ways, e.g. to limit IO-heavy jobs by assigning an artificial IO-resource to them and limiting it via the ``--resources`` flag.
 Resources must be ``int`` values.
+
 Note that you are free to choose any names for the given resources.
-When defining memory constraints, it is however advised to use ``mem_mb``, because there are
-Snakemake execution modes that make use of this information, (e.g., when using :ref:`kubernetes`).
+There are two **standard resources** for memory and disk usage though: ``mem_mb`` and ``disk_mb``.
+When defining memory constraints, it is advised to use ``mem_mb``, because there are
+Some execution modes make direct use of this information (e.g., when using :ref:`Kubernetes <kubernetes>`).
+Since it would be cumbersome to define them for every rule, you can set default values at the terminal or in a :ref:`profile <profiles>`.
+This works via the command line flag ``--default-resources``, see ``snakemake --help`` for more information.
+If those resource definitions are mandatory for a certain execution mode, Snakemake will fail with a hint if they are missing.
+Any resource definitions inside a rule override what has been defined with ``--default-resources``.

 Resources can also be callables that return ``int`` values.
 The signature of the callable has to be ``callable(wildcards [, input] [, threads] [, attempt])`` (``input``, ``threads``, and ``attempt`` are optional parameters).
@@ -298,7 +305,7 @@ Snakemake allows rules to specify numeric priorities:
 Per default, each rule has a priority of 0. Any rule that specifies a higher priority, will be preferred by the scheduler over all rules that are ready to execute at the same time without having at least the same priority.

 Furthermore, the ``--prioritize`` or ``-P`` command line flag allows to specify files (or rules) that shall be created with highest priority during the workflow execution. This means that the scheduler will assign the specified target and all its dependencies highest priority, such that the target is finished as soon as possible.
-The ``--dryrun`` or ``-n`` option allows you to see the scheduling plan including the assigned priorities.
+The ``--dry-run`` (equivalently ``--dryrun``) or ``-n`` option allows you to see the scheduling plan including the assigned priorities.




--- a/docs/tutorial/basics.rst
+++ b/docs/tutorial/basics.rst
@@ -47,7 +47,7 @@ In the following, we will introduce the Snakemake syntax by creating an example
 The workflow comes from the domain of genome analysis.
 It maps sequencing reads to a reference genome and call variants on the mapped reads.
 The tutorial does not require you to know what this is about.
-Nevertheless, we provide some background in the following.
+Nevertheless, we provide some background in the following paragraph.

 .. _tutorial-background:

@@ -122,7 +122,7 @@ By executing
    $ snakemake -np mapped_reads/A.bam

 in the working directory containing the Snakefile, we tell Snakemake to generate the target file ``mapped_reads/A.bam``.
-Since we used the ``-n`` (or ``--dryrun``) flag, Snakemake will only show the execution plan instead of actually perform the steps.
+Since we used the ``-n`` (or ``--dry-run``) flag, Snakemake will only show the execution plan instead of actually perform the steps.
 The ``-p`` flag instructs Snakemake to also print the resulting shell command for illustration.
 To generate the target files, **Snakemake applies the rules given in the Snakefile in a top-down way**.
 The application of a rule to generate a set of output files is called **job**.
@@ -412,7 +412,7 @@ Create the file ``scripts/plot-quals.py``, with the following content:
 Although there are other strategies to invoke separate scripts from your workflow
 (e.g., invoking them via shell commands), the benefit of this is obvious:
 the script logic is separated from the workflow logic (and can be even shared between workflows),
-but **boilerplate code like the parsing of command line arguments in unnecessary**.
+but **boilerplate code like the parsing of command line arguments is unnecessary**.

 Apart from Python scripts, it is also possible to use R scripts. In R scripts,
 an S4 object named ``snakemake`` analog to the Python case above is available and

--- a/docs/tutorial/short.rst
+++ b/docs/tutorial/short.rst
+Short tutorial
+==============
+
+Here we provide a short tutorial that guides you through the main features of Snakemake.
+Note that this is not suited to learn Snakemake from scratch, rather to give a first impression.
+To really learn Snakemake (starting from something simple, and extending towards advanced features), use the main :ref:`tutorial`.
+
+This document shows all steps performed in the official `Snakemake live demo <https://youtu.be/hPrXcUUp70Y>`_,
+such that it becomes possible to follow them at your own pace.
+Solutions to each step can be found at the bottom of this document.
+
+The examples presented in this tutorial come from Bioinformatics.
+However, Snakemake is a general-purpose workflow management system for any discipline.
+For an explanation of the steps you will perform here, have a look at :ref:`tutorial-background`.
+More thorough explanations are provided in the full :ref:`tutorial`.
+
+
+Prerequisites
+-------------
+
+First, install Snakemake via Conda, as outlined in :ref:`conda-install`.
+The minimal version of Snakemake is sufficient for this demo.
+
+Second, download and unpack the test data needed for this example from
+`here <https://bitbucket.org/snakemake/snakemake-tutorial/get/v5.2.3.tar.bz2>`_,
+e.g., via
+
+::
+
+   mkdir snakemake-demo
+   cd snakemake-demo
+   wget https://bitbucket.org/snakemake/snakemake-tutorial/get/v5.2.3.tar.bz2
+   tar --wildcards -xf v5.2.3.tar.bz2 --strip 1 "*/data"
+
+Step 1
+------
+
+First, create an empty workflow in the current directory with:
+
+::
+
+   touch Snakefile
+
+Once a Snakefile is present, you can perform a dry run of Snakemake
+with:
+
+::
+
+   snakemake -n
+
+Since the Snakefile is empty, it will report that nothing has to be
+done. In the next steps, we will gradually fill the Snakefile with an
+example analysis workflow.
+ 
+Step 2
+------
+
+The data folder in your working directory looks as follows:
+
+::
+
+   data
+   ├── genome.fa
+   ├── genome.fa.amb
+   ├── genome.fa.ann
+   ├── genome.fa.bwt
+   ├── genome.fa.fai
+   ├── genome.fa.pac
+   ├── genome.fa.sa
+   └── samples
+       ├── A.fastq
+       ├── B.fastq
+       └── C.fastq
+
+You will create a workflow that maps the sequencing samples in the
+``data/samples`` folder to the reference genome ``data/genome.fa``.
+Then, you will call genomic variants over the mapped samples, and create
+an example plot.
+
+First, create a rule called ``bwa``, with input files
+
+-  ``data/genome.fa``
+-  ``data/samples/A.fastq``
+
+and output file
+
+-  ``mapped/A.bam``
+
+To generate output from input, use the shell command
+
+.. code:: python
+
+       "bwa mem {input} | samtools view -Sb - > {output}"
+
+Providing a shell command is not enough to run your workflow on an
+unprepared system. For reproducibility, you also have to provide the
+required software stack and define the desired version. This can be done
+with the `Conda package manager <https://conda.io>`__, which is directly
+integrated with Snakemake: add a directive
+``conda: "envs/mapping.yaml"`` that points to a `Conda environment
+definition <https://conda.io/docs/user-guide/tasks/manage-environments.html?highlight=environment#creating-an-environment-file-manually>`__,
+with the following content
+
+.. code:: yaml
+
+       channels:
+         - bioconda
+         - conda-forge
+       dependencies:
+         - bwa =0.7.17
+         - samtools =1.9
+
+Upon execution, Snakemake will automatically create that environment,
+and execute the shell command within.
+
+Now, test your workflow by simulating the creation of the file
+``mapped/A.bam`` via
+
+::
+
+   snakemake --use-conda -n mapped/A.bam
+
+to perform a dry-run and
+
+::
+
+   snakemake --use-conda mapped/A.bam
+
+to perform the actual execution.
+ 
+Step 3
+------
+
+Now, generalize the rule ``bwa`` by replacing the concrete sample name
+``A`` with a wildcard ``{sample}`` in input and output file the rule
+``bwa``. This way, Snakemake can apply the rule to map any of the three
+available samples to the reference genome.
+
+Test this by creating the file ``mapped/B.bam``.
+
+Step 4
+------
+
+Next, create a rule ``sort`` that sorts the obtained ``.bam`` file by
+genomic coordinate. The rule should have the input file
+
+-  ``mapped/{sample}.bam``
+
+and the output file
+
+-  ``mapped/{sample}.sorted.bam``
+
+and uses the shell command
+
+::
+
+   samtools sort -o {output} {input}
+
+to perform the sorting. Moreover, use the same ``conda:`` directive as
+for the previous rule.
+
+Test your workflow with
+
+::
+
+   snakemake --use-conda -n mapped/A.sorted.bam
+
+and
+
+::
+
+   snakemake --use-conda mapped/A.sorted.bam
+
+Step 5
+------
+
+Now, we aggregate over all samples to perform a joint calling of genomic
+variants. First, we define a variable
+
+.. code:: python
+
+       samples = ["A", "B", "C"]
+
+at the top of the ``Snakefile``. This serves as a definition of the
+samples over which we would want to aggregate. In real life, you would
+want to use an external sample sheet or a `config
+file <http://snakemake.readthedocs.io/en/stable/tutorial/advanced.html#step-2-config-files>`__
+for things like this.
+
+For aggregation over many files, Snakemake provides the helper function
+``expand`` (see `the
+docs <http://snakemake.readthedocs.io/en/stable/tutorial/basics.html#step-5-calling-genomic-variants>`__).
+Create a rule ``call`` with input files
+
+-  ``fa="data/genome.fa"``
+-  ``bam=expand("mapped/{sample}.sorted.bam", sample=samples)``
+
+output file
+
+-  ``"calls/all.vcf"``
+
+and shell command
+
+::
+
+   samtools mpileup -g -f {input.fa} {input.bam} | bcftools call -mv - > {output}
+
+Further, define a new conda environment file with the following content:
+
+.. code:: yaml
+
+       channels:
+         - bioconda
+         - conda-forge
+       dependencies:
+         - bcftools =1.9
+         - samtools =1.9
+
+Step 6
+------
+
+Finally, we strive to calculate some exemplary statistics. This time, we
+don’t use a shell command, but rather employ Snakemake’s ability to
+integrate with scripting languages like R and Python.
+
+First, we create a rule ``stats`` with input file
+
+-  ``"calls/all.vcf"``
+
+and output file
+
+-  ``"plots/quals.svg"``.
+
+Instead of a shell command, we write
+
+.. code:: python
+
+       script:
+           "scripts/plot-quals.py"
+
+and create the corresponding script and its containing folder in our
+working directory with
+
+::
+
+   mkdir scripts
+   touch scripts/plot-quals.py
+
+We open the script in the editor and add the following content
+
+.. code:: python
+
+       import matplotlib
+       matplotlib.use("Agg")
+       import matplotlib.pyplot as plt
+       from pysam import VariantFile
+       
+       quals = [record.qual for record in VariantFile(snakemake.input[0])]
+       plt.hist(quals)
+       
+       plt.savefig(snakemake.output[0])
+
+As you can see, instead of writing a command line parser for passing
+parameters like input and output files, you have direct access to the
+properties of the rule via a magic ``snakemake`` object, that Snakemake
+automatically inserts into the script before executing the rule.
+
+Finally, we have to define a conda environment for the rule, say
+``envs/stats.yaml``, that provides the required Python packages to
+execute the script:
+
+.. code:: yaml
+
+       channels:
+         - bioconda
+         - conda-forge
+       dependencies:
+         - pysam =0.15
+         - matplotlib =3.1
+         - python =3.7
+
+Make sure to test your workflow with
+
+::
+
+   snakemake --use-conda plots/quals.svg
+ 
+Step 7
+------
+
+So far, we have always specified a target file at the command line when
+invoking Snakemake. When no target file is specified, Snakemake tries to
+execute the first rule in the ``Snakefile``. We can use this property to
+define default target files.
+
+At the top of your ``Snakefile`` define a rule ``all``, with input files
+
+-  ``"calls/all.vcf"``
+-  ``"plots/quals.svg"``
+
+and neither a shell command nor output files. This rule simply serves as
+an indicator of what shall be collected as results.
+
+Step 8
+------
+
+As a last step, we strive to annotate our workflow with some additional
+information.
+
+Automatic reports
+~~~~~~~~~~~~~~~~~
+
+Snakemake can automatically create HTML reports with
+
+::
+
+   snakemake --report report.html
+
+Such a report contains runtime statistics, a visualization of the
+workflow topology, used software and data provenance information.
+
+In addition, you can mark any output file generated in your workflow for
+inclusion into the report. It will be encoded directly into the report,
+such that it can be, e.g., emailed as a self-contained document. The
+reader (e.g., a collaborator of yours) can at any time download the
+enclosed results from the report for further use, e.g., in a manuscript
+you write together. In this example, please mark the output file
+``"plots/quals.svg"`` for inclusion by replacing it with
+``report("plots/quals.svg", caption="report/calling.rst")`` and adding a
+file ``report/calling.rst``, containing some description of the output
+file. This description will be presented as caption in the resulting
+report.
+
+Threads
+~~~~~~~
+
+The first rule ``bwa`` can in theory use multiple threads. You can make
+Snakemake aware of this, such that the information can be used for
+scheduling. Add a directive ``threads: 8`` to the rule and alter the
+shell command to
+
+::
+
+   bwa mem -t {threads} {input} | samtools view -Sb - > {output}
+
+This passes the threads defined in the rule as a command line argument
+to the ``bwa`` process.
+
+Temporary files
+~~~~~~~~~~~~~~~
+
+The output of the ``bwa`` rule becomes superfluous once the sorted
+version of the ``.bam`` file is generated by the rule ``sort``.
+Snakemake can automatically delete the superfluous output once it is not
+needed anymore. For this, mark the output as temporary by replacing
+``"mapped/{sample}.bam"`` in the rule ``bwa`` with
+``temp("mapped/{sample}.bam")``.
+
+Solutions
+---------
+
+Only read this if you have a problem with one of the steps.
+
+.. _step-2-1:
+
+Step 2
+~~~~~~
+
+The rule should look like this:
+
+.. code:: python
+
+       rule bwa:
+           input:
+               "data/genome.fa",
+               "data/samples/A.fastq"
+           output:
+               "mapped/A.bam"
+           conda:
+               "envs/mapping.yaml"
+           shell:
+               "bwa mem {input} | samtools view -Sb - > {output}"
+
+.. _step-3-1:
+
+Step 3
+~~~~~~
+
+The rule should look like this:
+
+.. code:: python
+
+       rule bwa:
+           input:
+               "data/genome.fa",
+               "data/samples/{sample}.fastq"
+           output:
+               "mapped/{sample}.bam"
+           conda:
+               "envs/mapping.yaml"
+           shell:
+               "bwa mem {input} | samtools view -Sb - > {output}"
+
+.. _step-4-1:
+
+Step 4
+~~~~~~
+
+The rule should look like this:
+
+.. code:: python
+
+       rule sort:
+           input:
+               "mapped/{sample}.bam"
+           output:
+               "mapped/{sample}.sorted.bam"
+           conda:
+               "envs/mapping.yaml"
+           shell:
+               "samtools sort -o {output} {input}"
+
+.. _step-5-1:
+
+Step 5
+~~~~~~
+
+The rule should look like this:
+
+.. code:: python
+
+       samples = ["A", "B", "C"]
+       
+       rule call:
+         input:
+             fa="data/genome.fa",
+             bam=expand("mapped/{sample}.sorted.bam", sample=samples)
+         output:
+             "calls/all.vcf"
+         conda:
+             "envs/calling.yaml"
+         shell:
+             "samtools mpileup -g -f {input.fa} {input.bam} | "
+             "bcftools call -mv - > {output}"
+
+.. _step-6-1:
+
+Step 6
+~~~~~~
+
+The rule should look like this:
+
+.. code:: python
+
+       rule stats:
+           input:
+               "calls/all.vcf"
+           output:
+               "plots/quals.svg"
+           conda:
+               "envs/stats.yaml"
+           script:
+               "scripts/plot-quals.py"
+
+.. _step-7-1:
+
+Step 7
+~~~~~~
+
+The rule should look like this:
+
+.. code:: python
+
+       rule all:
+           input:
+               "calls/all.vcf",
+               "plots/quals.svg"
+
+It has to appear as first rule in the ``Snakefile``.
+
+.. _step-8-1:
+
+Step 8
+~~~~~~
+
+The complete workflow should look like this:
+
+.. code:: python
+
+   samples = ["A", "B"]
+
+
+   rule all:
+       input:
+           "calls/all.vcf",
+           "plots/quals.svg"
+
+
+   rule bwa:
+       input:
+           "data/genome.fa",
+           "data/samples/{sample}.fastq"
+       output:
+           temp("mapped/{sample}.bam")
+       conda:
+           "envs/mapping.yaml"
+       threads: 8
+       shell:
+           "bwa mem -t {threads} {input} | samtools view -Sb - > {output}"
+
+
+   rule sort:
+       input:
+           "mapped/{sample}.bam"
+       output:
+           "mapped/{sample}.sorted.bam"
+       conda:
+           "envs/mapping.yaml"
+       shell:
+           "samtools sort -o {output} {input}"
+
+
+
+   rule call:
+       input:
+           fa="data/genome.fa",
+           bam=expand("mapped/{sample}.sorted.bam", sample=samples)
+       output:
+           "calls/all.vcf"
+       conda:
+           "envs/calling.yaml"
+       shell:
+           "samtools mpileup -g -f {input.fa} {input.bam} | "
+           "bcftools call -mv - > {output}"
+
+   rule stats:
+       input:
+           "calls/all.vcf"
+       output:
+           report("plots/quals.svg", caption="report/calling.rst")
+       conda:
+           "envs/stats.yaml"
+       script:
+           "scripts/plot-quals.py"
--- a/environment.yml
+++ b/environment.yml
@@ -32,3 +32,4 @@ dependencies:
  - xorg-libxpm
  - gitpython
  - pygments
+  - imagemagick
--- a/misc/vim/syntax/snakemake.vim
+++ b/misc/vim/syntax/snakemake.vim
 " Vim syntax file
 " Language:	Snakemake (extended from python.vim)
 " Maintainer:	Jay Hesselberth (jay.hesselberth@gmail.com)
-" Last Change:	2016 Jan 23
+" Last Change:	2019 Jul 26
 "
 " Usage
 "
@@ -43,10 +43,11 @@ source $VIMRUNTIME/syntax/python.vim
 " singularity = "singularity" ":" stringliteral
 " conda       = "conda" ":" stringliteral
 " shadow      = "shadow" ":" stringliteral
+" group      = "group" ":" stringliteral


 syn keyword pythonStatement	include workdir onsuccess onerror
-syn keyword pythonStatement	ruleorder localrules configfile
+syn keyword pythonStatement	ruleorder localrules configfile group
 syn keyword pythonStatement	touch protected temp wrapper conda shadow
 syn keyword pythonStatement	input output params message threads resources singularity
 syn keyword pythonStatement	version run shell benchmark snakefile log script

--- a/snakemake-tutorial.html
+++ b/snakemake-tutorial.html
@@ -106,7 +106,7 @@ code > span.er { color: #ff0000; font-weight: bold; }
 <p>A Snakemake rule has a name (here <code>bwa_map</code>) and a number of directives, here <code>input</code>, <code>output</code> and <code>shell</code>. The <code>input</code> and <code>output</code> directives are followed by lists of files that are expected to be used or created by the rule. In the simplest case, these are just explicit Python strings. The <code>shell</code> directive is followed by a Python string containing the shell command to execute. In the shell command string, we can refer to elements of the rule via braces notation (similar to the Python format function). Here, we refer to the output file by specifying <code>{output}</code> and to the input files by specifying <code>{input}</code>. Since the rule has multiple input files, Snakemake will concatenate them separated by a whitespace. In other words, Snakemake will replace <code>{input}</code> with <code>data/genome.fa data/samples/A.fastq</code> before executing the command. The shell command invokes <code>bwa mem</code> with reference genome and reads, and pipes the output into <code>samtools</code> which creates a compressed BAM file containing the alignments. The output of <code>samtools</code> is piped into the output file defined by the rule.</p>
 <p>When a workflow is executed, Snakemake tries to generate given <strong>target</strong> files. Target files can be specified via the command line. By executing</p>
 <pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> -np mapped_reads/A.bam</code></pre>
-<p>in the working directory containing the Snakefile, we tell Snakemake to generate the target file <code>mapped_reads/A.bam</code>. Since we used the <code>-n</code> (or <code>--dryrun</code>) flag, Snakemake will only show the execution plan instead of actually perform the steps. The <code>-p</code> flag instructs Snakemake to also print the resulting shell command for illustation. To generate the target files, <strong>Snakemake applies the rules given in the Snakefile in a top-down way</strong>. The application of a rule to generate a set of output files is called <strong>job</strong>. For each input file of a job, Snakemake again (i.e. recursively) determines rules that can be applied to generate it. This yields a directed acyclic graph (DAG) of jobs where the edges represent dependencies. So far, we only have a single rule, and the DAG of jobs consists of a single node. Nevertheless, we can <strong>execute our workflow</strong> with</p>
+<p>in the working directory containing the Snakefile, we tell Snakemake to generate the target file <code>mapped_reads/A.bam</code>. Since we used the <code>-n</code> (or <code>--dry-run</code>) flag, Snakemake will only show the execution plan instead of actually perform the steps. The <code>-p</code> flag instructs Snakemake to also print the resulting shell command for illustation. To generate the target files, <strong>Snakemake applies the rules given in the Snakefile in a top-down way</strong>. The application of a rule to generate a set of output files is called <strong>job</strong>. For each input file of a job, Snakemake again (i.e. recursively) determines rules that can be applied to generate it. This yields a directed acyclic graph (DAG) of jobs where the edges represent dependencies. So far, we only have a single rule, and the DAG of jobs consists of a single node. Nevertheless, we can <strong>execute our workflow</strong> with</p>
 <pre class="sourceCode bash"><code class="sourceCode bash"><span class="kw">snakemake</span> mapped_reads/A.bam</code></pre>
 <p>Note that, after completion of above command, Snakemake will not try to create <code>mapped_reads/A.bam</code> again, because it is already present in the file system. Snakemake <strong>only re-runs jobs if one of the input files is newer than one of the output files or one of the input files will be updated by another job</strong>.</p>
 <h2 id="step-2-generalizing-the-read-mapping-rule">Step 2: Generalizing the read mapping rule</h2>

--- a/snakemake/__init__.py
+++ b/snakemake/__init__.py
@@ -37,6 +37,7 @@ def snakemake(snakefile,
              nodes=1,
              local_cores=1,
              resources=dict(),
+              default_resources=dict(),
              config=dict(),
              configfile=None,
              config_args=None,
@@ -141,6 +142,7 @@ def snakemake(snakefile,
        nodes (int):                the number of provided cluster nodes (ignored without cluster support) (default 1)
        local_cores (int):          the number of provided local cores if in cluster mode (ignored without cluster support) (default 1)
        resources (dict):           provided resources, a dictionary assigning integers to resource names, e.g. {gpu=1, io=5} (default {})
+        default_resources (dict):   default values for resources not defined in rules (default {})
        config (dict):              override values for workflow config
        workdir (str):              path to working directory (default None)
        targets (list):             list of targets, e.g. rule or file names (default None)
@@ -397,7 +399,8 @@ def snakemake(snakefile,
                        attempt=attempt,
                        default_remote_provider=_default_remote_provider,
                        default_remote_prefix=default_remote_prefix,
-                        run_local=run_local)
+                        run_local=run_local,
+                        default_resources=default_resources)
        success = True
        workflow.include(snakefile,
                         overwrite_first_rule=True,
@@ -419,6 +422,7 @@ def snakemake(snakefile,
                                       nodes=nodes,
                                       local_cores=local_cores,
                                       resources=resources,
+                                       default_resources=default_resources,
                                       dryrun=dryrun,
                                       touch=touch,
                                       printreason=printreason,
@@ -566,12 +570,12 @@ def snakemake(snakefile,
    return success


-def parse_resources(args):
+def parse_resources(resources_args, fallback=None):
    """Parse resources from args."""
    resources = dict()
-    if args.resources is not None:
+    if resources_args is not None:
        valid = re.compile("[a-zA-Z_]\w*$")
-        for res in args.resources:
+        for res in resources_args:
            try:
                res, val = res.split("=")
            except ValueError:
@@ -583,8 +587,11 @@ def parse_resources(args):
            try:
                val = int(val)
            except ValueError:
-                raise ValueError(
-                    "Resource definiton must contain an integer after the identifier.")
+                if fallback is not None:
+                    val = fallback(val)
+                else:
+                    raise ValueError(
+                        "Resource definiton must contain an integer after the identifier.")
            if res == "_cores":
                raise ValueError(
                    "Resource _cores is already defined internally. Use a different name.")
@@ -592,6 +599,17 @@ def parse_resources(args):
    return resources


+def parse_default_resources(resources_args):
+    """Parse default resource definition args."""
+    def fallback(val):
+        def callable(wildcards, input, attempt, threads, rulename):
+            value = eval(val, {"input": input, "attempt": attempt, "threads": threads})
+            return value
+        return callable
+
+    return parse_resources(resources_args, fallback=fallback)
+
+
 def parse_config(args):
    """Parse config from args."""
    parsers = [int, float, eval, str]
@@ -700,10 +718,11 @@ def get_argument_parser(profile=None):
                        default=None,
                        help="Targets to build. May be rules or files.")

-    group_exec.add_argument("--dryrun", "-n",
+    group_exec.add_argument("--dry-run", "--dryrun", "-n",
+                        dest="dryrun",
                        action="store_true",
                        help="Do not execute anything, and display what would be done. "
-                             "If you have a very large workflow, use --dryrun --quiet to just "
+                             "If you have a very large workflow, use --dry-run --quiet to just "
                             "print a summary of the DAG of jobs.")

    group_exec.add_argument("--profile",
@@ -762,6 +781,15 @@ def get_argument_parser(profile=None):
              "use resources by defining the resource keyword, e.g. "
              "resources: gpu=1. If now two rules require 1 of the resource "
              "'gpu' they won't be run in parallel by the scheduler."))
+    group_exec.add_argument(
+        "--default-resources", "--default-res",
+        nargs="*",
+        metavar="NAME=INT",
+        help=("Define default values of resources for rules that do not define their own values. "
+              "In addition to plain integers, python expressions over inputsize are allowed (e.g. '2*input.size')."
+              "When specifying this without any arguments (--default-resources), it defines 'mem_mb=max(2*input.size, 1000)' "
+              "'disk_mb=max(2*input.size, 1000)', i.e., default disk and mem usage is twice the input file size but at least 1GB.")
+    )
    group_exec.add_argument(
        "--config", "-C",
        nargs="*",
@@ -968,7 +996,7 @@ def get_argument_parser(profile=None):
    group_utils.add_argument(
        "--delete-all-output",
        action="store_true",
-        help="Remove all files generated by the workflow. Use together with --dryrun "
+        help="Remove all files generated by the workflow. Use together with --dry-run "
        "to list files without actually deleting anything. Note that this will "
        "not recurse into subworkflows. Write-protected files are not removed. "
        "Nevertheless, use with care!"
@@ -977,7 +1005,7 @@ def get_argument_parser(profile=None):
        "--delete-temp-output",
        action="store_true",
        help="Remove all temporary files generated by the workflow. Use together "
-        "with --dryrun to list files without actually deleting anything. Note "
+        "with --dry-run to list files without actually deleting anything. Note "
        "that this will not recurse into subworkflows."
    )
    group_utils.add_argument(
@@ -1191,7 +1219,7 @@ def get_argument_parser(profile=None):
         "submitted to the cluster with the given command, once all input "
         "files for a particular job are present.\n"
         "The submit command can be decorated to make it aware of certain "
-         "job properties (input, output, params, wildcards, log, threads "
+         "job properties (name, rulename, input, output, params, wildcards, log, threads "
         "and dependencies (see the argument below)), e.g.:\n"
         "$ snakemake --cluster 'qsub -pe threaded {threads}'.")),
    cluster_mode_group.add_argument(
@@ -1211,7 +1239,7 @@ def get_argument_parser(profile=None):
        "submitted to the cluster with the given command, once all input "
        "files for a particular job are present. ARGS can be used to "
        "specify options of the underlying cluster system, "
-        "thereby using the job properties input, output, params, wildcards, log, "
+        "thereby using the job properties name, rulename, input, output, params, wildcards, log, "
        "threads and dependencies, e.g.: "
        "--drmaa ' -pe threaded {threads}'. Note that ARGS must be given in quotes and "
        "with a leading whitespace.")
@@ -1384,8 +1412,11 @@ def main(argv=None):
        sys.exit(0)

    try:
-        resources = parse_resources(args)
+        resources = parse_resources(args.resources)
        config = parse_config(args)
+        if args.default_resources is not None and not args.default_resources:
+            args.default_resources = ["mem_mb=max(2*input.size, 1000)", "disk_mb=max(2*input.size, 1000)"]
+        default_resources = parse_default_resources(args.default_resources)
    except ValueError as e:
        print(e, file=sys.stderr)
        print("", file=sys.stderr)
@@ -1505,6 +1536,7 @@ def main(argv=None):
                            local_cores=args.local_cores,
                            nodes=args.cores,
                            resources=resources,
+                            default_resources=default_resources,
                            config=config,
                            configfile=args.configfile,
                            config_args=args.config,