Skip to content
Commits on Source (4)
......@@ -4,6 +4,7 @@ jobs:
machine: true
environment:
- GCLOUD: /opt/google-cloud-sdk/bin/gcloud
- GSUTIL: /opt/google-cloud-sdk/bin/gsutil
- BOTO_CONFIG: /dev/null
steps:
- checkout
......@@ -54,10 +55,12 @@ jobs:
# otherwise init cloud
echo $GCLOUD_SERVICE_KEY | base64 --decode --ignore-garbage > ${HOME}/gcloud-service-key.json
sudo $GCLOUD components install kubectl
sudo $GCLOUD auth activate-service-account --key-file=${HOME}/gcloud-service-key.json
sudo $GCLOUD config set project snakemake-testing
sudo chown -R $USER:$USER /home/circleci/.config/gcloud
$GCLOUD auth activate-service-account --key-file=${HOME}/gcloud-service-key.json
$GCLOUD config set project snakemake-testing
- run:
name: Run tests
no_output_timeout: 30m
command: |
export GCLOUD_CLUSTER=t-`uuidgen`
export GOOGLE_APPLICATION_CREDENTIALS=${HOME}/gcloud-service-key.json
......
# Change Log
# [5.2.0] - 2018-06-28
## Changed
- Directory outputs have to marked with `directory`. This ensures proper handling of timestamps and cleanup. This is a breaking change. Implemented by Rasmus Ågren.
- Fixed kubernetes tests, fixed kubernetes volume handling. Implemented by Andrew Schriefer.
- jinja2 and networkx are not optional dependencies when installing via pip.
- When conda or singularity directives are used and the corresponding CLI flags are not specified, the user is notified at the beginning of the log output.
- Fixed numerous small bugs and papercuts and extended documentation.
# [5.1.5] - 2018-06-24
## Changed
- fixed missing version info in docker image.
......
snakemake (5.2.1-1) UNRELEASED; urgency=medium
* Team upload.
* New upstream version
- Fix compatibility with python 3.7 (Closes: #904350)
* Remove 0012-skip_test_needing_networ_connection.patch
upstream applied.
-- Dylan Aïssi <bob.dybian@gmail.com> Fri, 27 Jul 2018 19:30:16 +0200
snakemake (5.1.5-1) unstable; urgency=medium
* Team upload.
......
......@@ -10,7 +10,7 @@ sphinx proper, as sphinx.ext.napoleon.
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -37,7 +37,7 @@ extensions = [
@@ -37,7 +37,7 @@
'sphinx.ext.autodoc',
'sphinx.ext.mathjax',
'sphinx.ext.viewcode',
......
......@@ -8,7 +8,7 @@ Subject: Compat fix
--- a/tests/test_symlink_time_handling/Snakefile
+++ b/tests/test_symlink_time_handling/Snakefile
@@ -42,7 +42,7 @@ if not os.path.exists("input_file"):
@@ -42,7 +42,7 @@
shell("ln -s input_link output_link")
shell("touch -h -t {} output_link".format(timestr(2)))
......@@ -17,7 +17,7 @@ Subject: Compat fix
rule main:
output: "time_diff.txt"
@@ -57,7 +57,7 @@ rule main:
@@ -57,7 +57,7 @@
os.stat("input_file", follow_symlinks=False).st_mtime
) / (60*60) )
# I expect the result "4 1"
......
......@@ -10,7 +10,7 @@ Use debian's mathjax package
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -41,6 +41,9 @@ extensions = [
@@ -41,6 +41,9 @@
'sphinxarg.ext'
]
......
......@@ -4,7 +4,7 @@ Last-Update: Wed, 06 Dec 2017 22:10:17 +0100
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -37,8 +37,7 @@ extensions = [
@@ -37,8 +37,7 @@
'sphinx.ext.autodoc',
'sphinx.ext.mathjax',
'sphinx.ext.viewcode',
......
......@@ -57,7 +57,7 @@ Description: Avoid privacy breach
The Snakemake workflow management system is a tool to create **reproducible and scalable** data analyses.
--- a/docs/project_info/citations.rst
+++ b/docs/project_info/citations.rst
@@ -38,7 +38,7 @@ Project Pages
@@ -38,7 +38,7 @@
If you publish a Snakemake workflow, consider to add this badge to your project page:
......
Origin: https://bitbucket.org/snakemake/snakemake/commits/4741705d0b07e1131ed0cc940b53a802f49ead42
Description: Skip tests on missing inet connection or if not in CI.
Author: Johannes Köster <johannes.koester@tu-dortmund.de>
Last-Update: Mon Jul 9 11:30:23 2018 +0200
Bug-Upstream: https://bitbucket.org/snakemake/snakemake/issues/902/how-to-reliably-exclude-tests-requiring
--- a/tests/tests.py
+++ b/tests/tests.py
@@ -13,6 +13,7 @@ import hashlib
import urllib
from shutil import rmtree, which
from shlex import quote
+import pytest
from snakemake import snakemake
from snakemake.shell import shell
@@ -43,6 +44,18 @@ def is_connected():
return False
+def is_ci():
+ return "CI" in os.environ
+
+
+def has_gcloud_service_key():
+ return "GCLOUD_SERVICE_KEY" in os.environ
+
+
+def has_gcloud_cluster():
+ return "GCLOUD_CLUSTER" in os.environ
+
+
def copy(src, dst):
if os.path.isdir(src):
shutil.copytree(src, os.path.join(dst, os.path.basename(src)))
@@ -52,7 +65,6 @@ def copy(src, dst):
def run(path,
shouldfail=False,
- needs_connection=False,
snakefile="Snakefile",
subpath=None,
check_md5=True, cores=3, **params):
@@ -61,12 +73,6 @@ def run(path,
There must be a Snakefile in the path and a subdirectory named
expected-results.
"""
-
- if needs_connection and not is_connected():
- print("Skipping test because of missing internet connection",
- file=sys.stderr)
- return False
-
results_dir = join(path, 'expected-results')
snakefile = join(path, snakefile)
assert os.path.exists(snakefile)
@@ -254,8 +260,9 @@ def test_persistent_dict():
pass
+@pytest.mark.skipif(not is_connected(), reason="no internet connection")
def test_url_include():
- run(dpath("test_url_include"), needs_connection=True)
+ run(dpath("test_url_include"))
def test_touch():
@@ -444,6 +451,7 @@ def test_spaces_in_fnames():
# pass
+@pytest.mark.skipif(not is_connected(), reason="no internet connection")
def test_remote_ncbi_simple():
try:
import Bio
@@ -454,6 +462,7 @@ def test_remote_ncbi_simple():
except ImportError:
pass
+@pytest.mark.skipif(not is_connected(), reason="no internet connection")
def test_remote_ncbi():
try:
import Bio
@@ -465,6 +474,7 @@ def test_remote_ncbi():
pass
+@pytest.mark.skipif(not is_ci(), reason="not in CI")
def test_remote_irods():
if os.environ.get("CI") == "true":
run(dpath("test_remote_irods"))
@@ -569,6 +579,8 @@ def test_run_namedlist():
run(dpath("test_run_namedlist"))
+@pytest.mark.skipif(not is_connected(), reason="no internet connection")
+@pytest.mark.skipif(not is_ci(), reason="not in CI")
def test_remote_gs():
if not "CI" in os.environ:
run(dpath("test_remote_gs"))
@@ -576,13 +588,17 @@ def test_remote_gs():
print("skipping test_remove_gs in CI")
+@pytest.mark.skipif(not is_connected(), reason="no internet connection")
def test_remote_log():
run(dpath("test_remote_log"), shouldfail=True)
+@pytest.mark.skipif(not is_connected(), reason="no internet connection")
def test_remote_http():
run(dpath("test_remote_http"))
+
+@pytest.mark.skipif(not is_connected(), reason="no internet connection")
def test_remote_http_cluster():
run(dpath("test_remote_http"), cluster=os.path.abspath(dpath("test14/qsub")))
@@ -590,6 +606,7 @@ def test_profile():
run(dpath("test_profile"))
+@pytest.mark.skipif(not is_connected(), reason="no internet connection")
def test_singularity():
run(dpath("test_singularity"), use_singularity=True)
@@ -613,28 +630,52 @@ def test_archive():
def test_log_input():
run(dpath("test_log_input"))
-
+@pytest.mark.skipif(not is_connected(), reason="no internet connection")
+@pytest.mark.skipif(not is_ci(), reason="no in CI")
+@pytest.mark.skipif(not has_gcloud_service_key(), reason="GCLOUD_SERVICE_KEY undefined")
+@pytest.mark.skipif(not has_gcloud_cluster(), reason="GCLOUD_CLUSTER undefined")
def test_gcloud():
- if "CI" in os.environ and "GCLOUD_SERVICE_KEY" in os.environ:
- cluster = os.environ["GCLOUD_CLUSTER"]
- try:
- shell("""
- sudo $GCLOUD container clusters create {cluster} --num-nodes 3 --scopes storage-rw --zone us-central1-a --machine-type f1-micro
- sudo $GCLOUD container clusters get-credentials {cluster} --zone us-central1-a
- """)
- run(dpath("test_kubernetes"))
- run(dpath("test_kubernetes"), use_conda=True)
- run(dpath("test_kubernetes"), use_singularity=True)
- run(dpath("test_kubernetes"), use_singularity=True, use_conda=True)
- finally:
- shell("sudo $GCLOUD container clusters delete {cluster} --zone us-central1-a --quiet")
- print("Skipping google cloud test")
+ cluster = os.environ["GCLOUD_CLUSTER"]
+ bucket_name = 'snakemake-testing-{}'.format(cluster)
+
+ def run_kubernetes(**kwargs):
+ run(dpath("test_kubernetes"),
+ kubernetes="default",
+ default_remote_provider="GS",
+ default_remote_prefix=bucket_name,
+ no_tmpdir=True,
+ **kwargs)
+ def reset():
+ shell('$GSUTIL rm -r gs://{}/*'.format(bucket_name))
+
+ try:
+ shell("""
+ $GCLOUD container clusters create {cluster} --num-nodes 3 --scopes storage-rw --zone us-central1-a --machine-type f1-micro
+ $GCLOUD container clusters get-credentials {cluster} --zone us-central1-a
+ $GSUTIL mb gs://{bucket_name}
+ """)
+ run_kubernetes()
+ reset()
+ run_kubernetes(use_conda=True)
+ reset()
+ run_kubernetes(use_singularity=True)
+ reset()
+ run_kubernetes(use_singularity=True, use_conda=True)
+ reset()
+ except:
+ shell("for p in `kubectl get pods | grep ^snakejob- | cut -f 1 -d ' '`; do kubectl logs $p; done")
+ finally:
+ shell("""
+ $GCLOUD container clusters delete {cluster} --zone us-central1-a --quiet
+ $GSUTIL rm -r gs://{bucket_name}
+ """)
+@pytest.mark.skipif(not is_connected(), reason="no internet connection")
def test_cwl():
run(dpath("test_cwl"))
-
+@pytest.mark.skipif(not is_connected(), reason="no internet connection")
def test_cwl_singularity():
run(dpath("test_cwl"), use_singularity=True)
......@@ -9,4 +9,3 @@
0009-skip-test-without-google-cloud-sdk.patch
# 0010-skip-test-without-rmarkdown.patch
0011-fix-privacy-breach.patch
0012-skip_test_needing_networ_connection.patch
......@@ -50,7 +50,6 @@ In alphabetical order
- Mattias Franberg
- Matt Shirley
- Paul Moore
- percyfal
- Per Unneberg
- Ryan C. Thompson
- Ryan Dale
......
......@@ -80,6 +80,30 @@ This entails the pipefail option, which reports errors from within a pipe to out
to your shell command in the problematic rule.
I don't want Snakemake to detect an error if my shell command exits with an exitcode > 1. What can I do?
---------------------------------------------------------------------------------------------------------
Sometimes, tools encode information in exit codes bigger than 1. Snakemake by default treats anything > 0 as an error. Special cases have to be added by yourself. For example, you can write
.. code-block:: python
shell:
"""
set +e
somecommand ...
exitcode=$?
if [ $exitcode -eq 1 ]
then
exit 1
else
exit 0
fi
"""
This way, Snakemake only treats exit code 1 as an error, and thinks that everything else is fine.
Note that such tools are an excellent use case for contributing a `wrapper <https://snakemake-wrappers.readthedocs.io>`_.
.. _glob-wildcards:
How do I run my rule on all files of a certain directory?
......@@ -387,6 +411,26 @@ As a solution, you can put the `--config` at the end of your invocation, or prep
$ snakemake mytarget --config foo=bar
How do I enforce config values given at the command line to be interpreted as strings?
--------------------------------------------------------------------------------------
When passing config values like this
.. code-block:: console
$ snakemake --config version=2018_1
Snakemake will first try to interpret the given value as number.
Only if that fails, it will interpret the value as string.
Here, it does not fail, because the underscore `_` is interpreted as thousand separator.
In order to ensure that the value is interpreted as string, you have to pass it in quotes.
Since bash otherwise automatically removes quotes, you have to also wrap the entire entry into quotes, e.g.:
.. code-block:: console
$ snakemake --config 'version="2018_1"'
How do I make my rule fail if an output file is empty?
------------------------------------------------------
......
......@@ -58,17 +58,6 @@ Expand still works as expected, just wrap the expansion:
input:
S3.remote(expand("bucket-name/{letter}-2.txt", letter=["A", "B", "C"]))
It is possible to use S3-compatible storage by specifying a different endpoint address as the `host` kwarg in the provider, as the kwargs used in instantiating the provider are passed in to `boto <https://boto.readthedocs.org/en/latest/ref/s3.html#boto.s3.connection.S3Connection>`_:
.. code-block:: python
from snakemake.remote.S3 import RemoteProvider as S3RemoteProvider
S3 = S3RemoteProvider(access_key_id="MYACCESSKEY", secret_access_key="MYSECRET", host="mystorage.example.com")
rule all:
input:
S3.remote("bucket-name/file.txt")
Only remote files needed to satisfy the DAG build are downloaded for the workflow. By default, remote files are downloaded prior to rule execution and are removed locally as soon as no rules depend on them. Remote files can be explicitly kept by setting the ``keep_local=True`` keyword argument:
.. code-block:: python
......
......@@ -527,6 +527,22 @@ Further, an output file marked as ``temp`` is deleted after all rules that use i
shell:
"somecommand {input} {output}"
Directories as outputs
----------------------
There are situations where it can be convenient to have directories, rather than files, as outputs of a rule. For example, some tools generate different output files based on which settings they are run with. Rather than covering all these cases with conditional statements in the Snakemake rule, you can let the rule output a directory that contains all the output files regardless of settings. Another use case could be when the number of outputs is large or unknown, say one file per identified species in a metagenomics sample or one file per cluster from a clustering algorithm. If all downstream rules rely on the whole sets of outputs, rather than on the individual species/clusters, then having a directory as an output can be a faster and easier solution compared to using the ``dynamic`` keyword.
As of version 5.2.0, directories as outputs have to be explicitly marked with ``directory``. This is primarily for safety reasons; since all outputs are deleted before a job is executed, we don't want to risk deleting important directories if the user makes some mistake. Marking the output as ``directory`` makes the intent clear, and the output can be safely removed. Another reason comes down to how modification time for directories work. The modification time on a directory changes when a file or a subdirectory is added, removed or renamed. This can easily happen in not-quite-intended ways, such as when Apple macOS or MS Windows add ``.DS_Store`` or ``thumbs.db`` files to store parameters for how the directory contents should be displayed. When the ``directory`` flag is used, then a hidden file called ``.snakemake_timestamp`` is created in the output directory, and the modification time of that file is used when determining whether the rule output is up to date or if it needs to be rerun.
.. code-block:: python
rule NAME:
input:
"path/to/inputfile"
output:
directory("path/to/outputdir")
shell:
"somecommand {input} {output}"
Ignoring timestamps
-------------------
......@@ -552,7 +568,7 @@ Shadow rules
Shadow rules result in each execution of the rule to be run in isolated temporary directories. This "shadow" directory contains symlinks to files and directories in the current workdir. This is useful for running programs that generate lots of unused files which you don't want to manually cleanup in your snakemake workflow. It can also be useful if you want to keep your workdir clean while the program executes, or simplify your workflow by not having to worry about unique filenames for all outputs of all rules.
By setting ``shadow: "shallow"``, the top level files and directories are symlinked, so that any relative paths in a subdirectory will be real paths in the filesystem. The setting ``shadow: "full"`` fully shadows the entire subdirectory structure of the current workdir. Once the rule successfully executes, the output file will be moved if necessary to the real path as indicated by ``output``.
By setting ``shadow: "shallow"``, the top level files and directories are symlinked, so that any relative paths in a subdirectory will be real paths in the filesystem. The setting ``shadow: "full"`` fully shadows the entire subdirectory structure of the current workdir. The setting ``shadow: "minimal"`` only symlinks the inputs to the rule. Once the rule successfully executes, the output file will be moved if necessary to the real path as indicated by ``output``.
Shadow directories are stored one per rule execution in ``.snakemake/shadow/``, and are cleared on subsequent snakemake invocations unless the ``--keep-shadow`` command line argument is used.
......@@ -622,7 +638,7 @@ Dynamic Files
Snakemake provides experimental support for dynamic files.
Dynamic files can be used whenever one has a rule, for which the number of output files is unknown before the rule was executed.
This is useful for example with cetain clustering algorithms:
This is useful for example with certain clustering algorithms:
.. code-block:: python
......@@ -942,8 +958,9 @@ Defining groups for execution
-----------------------------
From Snakemake 5.0 on, it is possible to assign rules to groups.
Such groups will be executed together in cluster or cloud mode, as a so-called **group job**, i.e., all jobs of a particular group will be submitted at once, to the same computing node. By this, queueing and execution time can be
safed, in particular if one or several short-running rules are involved.
Such groups will be executed together in **cluster** or **cloud mode**, as a so-called **group job**, i.e., all jobs of a particular group will be submitted at once, to the same computing node.
By this, queueing and execution time can be safed, in particular if one or several short-running rules are involved.
When executing locally, group definitions are ignored.
Groups can be defined via the ``group`` keyword, e.g.,
......@@ -1016,6 +1033,7 @@ From Snakemake 5.0 on, it is possible to mark output files as pipes, via the ``p
"grep {wildcards.i} < {input} > {output}"
If an output file is marked to be a pipe, then Snakemake will first create a `named pipe <https://en.wikipedia.org/wiki/Named_pipe>`_ with the given name and then execute the creating job simultaneously with the consuming job, inside a **group job** (see above).
This works in all execution modes, local, cluster, and cloud.
Naturally, a pipe output may only have a single consumer.
It is possible to combine explicit group definition as above with pipe outputs.
Thereby, pipe jobs can live within, or (automatically) extend existing groups.
......
......@@ -46,7 +46,8 @@ setup(
package_data={'': ['*.css', '*.sh', '*.html']},
install_requires=['wrapt', 'requests', 'ratelimiter', 'pyyaml',
'configargparse', 'appdirs', 'datrie', 'jsonschema',
'docutils', 'jinja2', 'networkx'],
'docutils'],
extras_require={"reports": ['jinja2', 'networkx']},
classifiers=
["Development Status :: 5 - Production/Stable", "Environment :: Console",
"Intended Audience :: Science/Research",
......
......@@ -545,12 +545,15 @@ def snakemake(snakefile,
# in such a case, snakemake shall stop scheduling and quit with error 1
success = False
except (Exception, BaseException) as ex:
print_exception(ex, workflow.linemaps)
if "workflow" in locals():
print_exception(ex, workflow.linemaps)
else:
print_exception(ex, dict())
success = False
if workdir:
os.chdir(olddir)
if workflow.persistence:
if "workflow" in locals() and workflow.persistence:
workflow.persistence.unlock()
if not keep_logger:
logger.cleanup()
......
......@@ -23,9 +23,9 @@ def get_keywords():
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
git_refnames = " (HEAD -> master, tag: v5.1.5)"
git_full = "b3233c876acb5df7cd9596611568767ce9dd2a30"
git_date = "2018-06-24 14:34:21 +0200"
git_refnames = " (tag: v5.2.1)"
git_full = "31a6edfb1e2110c1e197d1f75367133c13ff3eb6"
git_date = "2018-07-23 10:35:27 +0200"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
......
......@@ -3,6 +3,7 @@ import subprocess
import tempfile
from urllib.request import urlopen
from urllib.parse import urlparse
from urllib.error import URLError
import hashlib
import shutil
from distutils.version import StrictVersion
......@@ -19,7 +20,10 @@ from snakemake import singularity
def content(env_file):
if urlparse(env_file).scheme:
return urlopen(env_file).read()
try:
return urlopen(env_file).read()
except URLError as e:
raise WorkflowError("Failed to open environment file {}:".format(env_file), e)
else:
if not os.path.exists(env_file):
raise WorkflowError("Conda env file does not "
......@@ -253,15 +257,24 @@ def shellcmd(env_path):
def check_conda(singularity_img=None):
def get_cmd(cmd, singularity_img=None):
def get_cmd(cmd):
if singularity_img:
return singularity.shellcmd(self.singularity_img.path, cmd)
return singularity.shellcmd(singularity_img.path, cmd)
return cmd
if subprocess.check_output(get_cmd("which conda"),
shell=True,
stderr=subprocess.STDOUT) is None:
raise CreateCondaEnvironmentException("The 'conda' command is not available in $PATH.")
try:
subprocess.check_output(get_cmd("which conda"),
shell=True,
stderr=subprocess.STDOUT)
except subprocess.CalledProcessError:
if singularity_img:
raise CreateCondaEnvironmentException("The 'conda' command is not "
"available inside "
"your singularity container "
"image.")
else:
raise CreateCondaEnvironmentException("The 'conda' command is not "
"available.")
try:
version = subprocess.check_output(get_cmd("conda --version"),
shell=True,
......
......@@ -22,9 +22,9 @@ from snakemake.jobs import Job, Reason, GroupJob
from snakemake.exceptions import RuleException, MissingInputException
from snakemake.exceptions import MissingRuleException, AmbiguousRuleException
from snakemake.exceptions import CyclicGraphException, MissingOutputException
from snakemake.exceptions import IncompleteFilesException
from snakemake.exceptions import IncompleteFilesException, ImproperOutputException
from snakemake.exceptions import PeriodicWildcardError, WildcardError
from snakemake.exceptions import RemoteFileException, WorkflowError
from snakemake.exceptions import RemoteFileException, WorkflowError, ChildIOException
from snakemake.exceptions import UnexpectedOutputException, InputFunctionException
from snakemake.logging import logger
from snakemake.common import DYNAMIC_FILL
......@@ -130,6 +130,21 @@ class DAG:
self.set_until_jobs()
self.delete_omitfrom_jobs()
self.update_jobids()
# Check if there are files/dirs that are children of other outputs.
allfiles = {}
for job in self.jobs:
# This is to account also for targets of symlinks
allfiles.update({f(x):"input" for x in job.input for f in (os.path.abspath, os.path.realpath)})
allfiles.update({f(x):"output" for x in job.output for f in (os.path.abspath, os.path.realpath)})
sortedfiles = sorted(allfiles.keys())
for i in range(len(sortedfiles)-1):
if allfiles[sortedfiles[i]] == "output":
if os.path.commonpath([sortedfiles[i]]) == os.path.commonpath([sortedfiles[i], sortedfiles[i+1]]):
raise ChildIOException(parent = sortedfiles[i], child = sortedfiles[i+1])
# check if remaining jobs are valid
for i, job in enumerate(self.jobs):
job.is_valid()
......@@ -357,18 +372,23 @@ class DAG:
"filesystem latency. If that is the case, consider to increase the "
"wait time with --latency-wait.", rule=job.rule)
# Ensure that outputs are of the correct type (those flagged with directory()
# are directories and not files and vice versa).
for f in expanded_output:
if (f.is_directory and not os.path.isdir(f)) or (os.path.isdir(f) and not f.is_directory):
raise ImproperOutputException(job.rule, [f])
#It is possible, due to archive expansion or cluster clock skew, that
#the files appear older than the input. But we know they must be new,
#so touch them to update timestamps. This also serves to touch outputs
#when using the --touch flag.
#Note that if the input files somehow have a future date then this will
#not currently be spotted and the job will always be re-run.
#Also, don't touch directories, as we can't guarantee they were removed.
if not no_touch:
for f in expanded_output:
#This will neither create missing files nor touch directories
if os.path.isfile(f):
f.touch()
# This won't create normal files if missing, but will create
# the flag file for directories.
f.touch()
def unshadow_output(self, job, only_log=False):
""" Move files from shadow directory to real output paths. """
......@@ -444,7 +464,6 @@ class DAG:
is_temp = lambda f: is_flagged(f, "temp")
# handle temp input
needed = lambda job_, f: any(
f in files for j, files in self.depending[job_].items()
if not self.finished(j) and self.needrun(j) and j != job)
......@@ -456,8 +475,9 @@ class DAG:
yield from filterfalse(partial(needed, job_), tempfiles & files)
# temp output
if job not in self.targetjobs and not job.dynamic_output:
tempfiles = (f for f in job.expanded_output if is_temp(f))
if not job.dynamic_output:
tempfiles = (f for f in job.expanded_output
if is_temp(f) and f not in self.targetfiles)
yield from filterfalse(partial(needed, job), tempfiles)
for f in unneeded_files():
......@@ -937,8 +957,9 @@ class DAG:
assert targetfile is not None
return self.job_cache[key]
wildcards_dict = rule.get_wildcards(targetfile)
job = Job(rule, self, wildcards_dict=wildcards_dict, format_wildcards=format_wildcards)
for f in job.output:
job = Job(rule, self, wildcards_dict=wildcards_dict,
format_wildcards=format_wildcards, targetfile=targetfile)
for f in job.products:
self.job_cache[(rule, f)] = job
return job
......
......@@ -134,7 +134,7 @@ class WildcardError(WorkflowError):
class RuleException(Exception):
"""
Base class for exception occuring withing the
Base class for exception occuring within the
execution or definition of rules.
"""
......@@ -181,6 +181,10 @@ class InputFunctionException(WorkflowError):
"{}={}".format(name, value) for name, value in wildcards.items())
super().__init__(msg, lineno=lineno, snakefile=snakefile, rule=rule)
class ChildIOException(WorkflowError):
def __init__(self, parent=None, child=None, wildcards=None, lineno=None, snakefile=None, rule=None):
msg = "File/directory is a child to another output:\n" + "{}\n{}".format(parent, child)
super().__init__(msg, lineno=lineno, snakefile=snakefile, rule=rule)
class MissingOutputException(RuleException):
pass
......@@ -217,6 +221,12 @@ class ProtectedOutputException(IOException):
lineno=lineno,
snakefile=snakefile)
class ImproperOutputException(IOException):
def __init__(self, rule, files, include=None, lineno=None, snakefile=None):
super().__init__("Outputs of incorrect type (directories when expecting files or vice versa). "
"Output directories must be flagged with directory().", rule, files, include,
lineno=lineno,
snakefile=snakefile)
class UnexpectedOutputException(IOException):
def __init__(self, rule, files, include=None, lineno=None, snakefile=None):
......