Skip to content
Commits on Source (5)
......@@ -28,3 +28,4 @@ cwl/*-workflow
.idea/
__pycache__
.coverage
.pytest_cache
......@@ -22,8 +22,9 @@ install:
- df -h
- export PATH=~/install/bcbio-vm/anaconda/bin:$PATH
- conda install --yes nomkl
- travis_wait 30 conda install --yes -c conda-forge -c bioconda bcbio-nextgen-vm bcbio-nextgen
- travis_wait conda install --yes -c conda-forge -c bioconda cwltool toil rabix-bunny
- conda install --yes -c conda-forge -c bioconda bcbio-nextgen
- conda install --yes -c conda-forge -c bioconda bcbio-nextgen-vm
- conda install --yes -c conda-forge -c bioconda cwltool toil rabix-bunny
# Clean up space with external tools we don't need for tests
- conda clean --yes --tarballs --index-cache
- conda remove --yes --force qt
......@@ -43,14 +44,14 @@ script:
# Update to latest bcbio-nextgen code within the container
- bcbio_vm.py devel setup_install -i quay.io/bcbio/bcbio-vc
# -- Standard bcbio variant tests
- docker run -v `pwd`:`pwd` quay.io/bcbio/bcbio-vc bash -c "cd `pwd` && /usr/local/share/bcbio-nextgen/anaconda/bin/py.test tests/unit --cov=bcbio"
- py.test tests/bcbio_vm -v -m docker_multicore
- docker run -v `pwd`:`pwd` quay.io/bcbio/bcbio-vc bash -c "cd `pwd` && /usr/local/share/bcbio-nextgen/anaconda/bin/py.test -p no:cacheprovider tests/unit --cov=bcbio"
- py.test -p no:cacheprovider tests/bcbio_vm -v -m docker_multicore
# -- bcbio variant CWL tests
- py.test tests/bcbio_vm -v -s -m cwl_docker_joint
- py.test tests/bcbio_vm -v -s -m cwl_docker_somatic
- py.test -p no:cacheprovider tests/bcbio_vm -v -s -m cwl_docker_joint
- py.test -p no:cacheprovider tests/bcbio_vm -v -s -m cwl_docker_somatic
# -- platform integration
- sudo mkdir -p /etc/pki/tls/certs && sudo ln -s /etc/ssl/certs/ca-certificates.crt /etc/pki/tls/certs/ca-bundle.crt
- py.test tests/bcbio_vm -v -s -m cwl_arvados
- py.test -p no:cacheprovider tests/bcbio_vm -v -s -m cwl_arvados
# -- Cleanup variant docker image
- docker ps -a -q | xargs --no-run-if-empty docker rm
- docker rmi -f quay.io/bcbio/bcbio-vc
......@@ -61,7 +62,7 @@ script:
- docker images
- df -h
- bcbio_vm.py devel setup_install -i quay.io/bcbio/bcbio-rnaseq
- py.test tests/bcbio_vm -v -s -m cwl_docker_rnaseq
- py.test -p no:cacheprovider tests/bcbio_vm -v -s -m cwl_docker_rnaseq
# -- Cleanup RNA-seq docker image
- docker ps -a -q | xargs --no-run-if-empty docker rm
- docker rmi -f quay.io/bcbio/bcbio-rnaseq
......
......@@ -12,9 +12,9 @@ RUN apt-get update && \
# bcbio-nextgen installation
mkdir -p /tmp/bcbio-nextgen-install && cd /tmp/bcbio-nextgen-install && \
wget --no-check-certificate \
https://raw.github.com/chapmanb/bcbio-nextgen/master/scripts/bcbio_nextgen_install.py && \
https://raw.github.com/bcbio/bcbio-nextgen/master/scripts/bcbio_nextgen_install.py && \
python bcbio_nextgen_install.py /usr/local/share/bcbio-nextgen \
--isolate --nodata -u development --tooldir=/usr/local && \
--isolate --minimize-disk --nodata -u development && \
git config --global url.https://github.com/.insteadOf git://github.com/ && \
/usr/local/share/bcbio-nextgen/anaconda/bin/conda install -y nomkl && \
/usr/local/share/bcbio-nextgen/anaconda/bin/bcbio_nextgen.py upgrade --isolate --tooldir=/usr/local --tools && \
......@@ -28,7 +28,7 @@ RUN apt-get update && \
# add user run script
wget --no-check-certificate -O createsetuser \
https://raw.github.com/chapmanb/bcbio-nextgen-vm/master/scripts/createsetuser && \
https://raw.github.com/bcbio/bcbio-nextgen-vm/master/scripts/createsetuser && \
chmod a+x createsetuser && mv createsetuser /sbin && \
# clean filesystem
......
## 1.0.9 (10 April 2018)
- Use smoove for lumpy variant calling and genotyping, replacing custom lumpyexpress
implementation: [validation](https://github.com/bcbio/bcbio_validations/tree/master/NA24385_sv#smoove-validation)
- Generalize exclusion of regions during variant calling with new
`exclude_regions` target. Includes previously available LCR and high depth
regions, in addition to removal of polyX and alternative contigs.
- Normalize allele frequency calculation and filtering for Strelka2 and MuTect2.
Thanks to Vlad Saveliev.
- CNVkit: enable specification of pre-built reference background cnn with
`background: cnv_reference`.
- CNVkit: handle projects with mixed CNVkit and non-CNVkit usage. Thanks to Luca
Beltrame.
- Improved Atropos trimming: better use of multicore parallelization in variant
and RNA-seq pipelines.
- Add support for polyG and polyX trimming to variant calling for NovaSeq 3' end
cleanup and generally avoiding low complexity reads.
- Structural variant: use SURVIVOR for validation comparisons.
- RNA-seq variant calling: use multiple cores for VarDict.
- Support miRge2.0 for alternative small RNA annotation. Users should
install the tool manually until compatible with bioconda.
- Add bamCoverage to chip-seq pipeline to calculate bigwig files.
- GATK4: Correctly use GATK4 GatherVcfs when tools_off: [gatk4] specified for
variant calling. Thanks to Luca Beltrame.
- variant: Default to `mark_duplicates: false` if alignment turned off
(`aligner: false`).
- variant: Fix race condition when preparing BED files for coverage and
sv_regions. Thanks to Tristan Lubinski.
- Fix `noalt_calling` to correctly avoid parallelizing on non-standard
chromosomes without a variant regions file.
- Fix broken `kraken` command. Thanks to @choosehappy.
## 1.0.8 (5 February 2018)
- GATK4 is the new default GATK release used in bcbio when running HaplotypeCaller or
......
......@@ -10,8 +10,8 @@ provides a shared community resource that handles the data processing component
of sequencing analysis, providing researchers with more time to focus on the
downstream biology.
.. image:: https://travis-ci.org/chapmanb/bcbio-nextgen.png
:target: https://travis-ci.org/chapmanb/bcbio-nextgen
.. image:: https://travis-ci.org/bcbio/bcbio-nextgen.png
:target: https://travis-ci.org/bcbio/bcbio-nextgen
Features
--------
......@@ -55,7 +55,7 @@ Quick start
1. `Install`_ ``bcbio-nextgen`` with all tool dependencies and data files::
wget https://raw.github.com/chapmanb/bcbio-nextgen/master/scripts/bcbio_nextgen_install.py
wget https://raw.github.com/bcbio/bcbio-nextgen/master/scripts/bcbio_nextgen_install.py
python bcbio_nextgen_install.py /usr/local/share/bcbio --tooldir=/usr/local \
--genomes GRCh37 --aligners bwa --aligners bowtie2
......@@ -74,8 +74,8 @@ Quick start
cd project1/work
bcbio_nextgen.py ../config/project1.yaml -n 8
.. _system configuration file: https://github.com/chapmanb/bcbio-nextgen/blob/master/config/bcbio_system.yaml
.. _sample description file: https://github.com/chapmanb/bcbio-nextgen/blob/master/config/bcbio_sample.yaml
.. _system configuration file: https://github.com/bcbio/bcbio-nextgen/blob/master/config/bcbio_system.yaml
.. _sample description file: https://github.com/bcbio/bcbio-nextgen/blob/master/config/bcbio_sample.yaml
.. _Automatically create a processing description: https://bcbio-nextgen.readthedocs.org/en/latest/contents/configuration.html#automated-sample-configuration
.. _Install: https://bcbio-nextgen.readthedocs.org/en/latest/contents/installation.html#automated
.. _configuration options: https://bcbio-nextgen.readthedocs.org/en/latest/contents/configuration.html
......@@ -88,7 +88,7 @@ See the `full documentation`_ and `longer analysis-based articles
and discussion on the `biovalidation mailing list`_.
.. _full documentation: https://bcbio-nextgen.readthedocs.org
.. _GitHub: https://github.com/chapmanb/bcbio-nextgen/issues
.. _GitHub: https://github.com/bcbio/bcbio-nextgen/issues
.. _biovalidation mailing list: https://groups.google.com/d/forum/biovalidation
Contributors
......
......@@ -266,7 +266,7 @@ def _check_sample(in_bam, rgnames):
if len(msgs) > 0:
raise ValueError("Problems with pre-aligned input BAM file: %s\n" % (in_bam)
+ "\n".join(msgs) +
"\nSetting `bam_clean: picard` or `bam_clean: fixrg`\n"
"\nSetting `bam_clean: fixrg`\n"
"in the configuration can often fix this issue.")
if warnings:
print("*** Potential problems in input BAM compared to reference:\n%s\n" %
......@@ -301,7 +301,7 @@ def _check_bam_contigs(in_bam, ref_file, config):
warnings.append("Extra reference chromosomes: %s" % rc)
if problems:
raise ValueError("Unexpected order, name or contig mismatches between input BAM and reference file:\n%s\n"
"Setting `bam_clean: picard` in the configuration can often fix this issue."
"Setting `bam_clean: remove_extracontigs` in the configuration can often fix this issue."
% "\n".join(problems))
if warnings:
print("*** Potential problems in input BAM compared to reference:\n%s\n" %
......
......@@ -32,18 +32,17 @@ def sample_callable_bed(bam_file, ref_file, data):
"""
from bcbio.heterogeneity import chromhacks
CovInfo = collections.namedtuple("CovInfo", "callable, raw_callable, depth_files")
noalt_calling = "noalt_calling" in dd.get_tools_on(data)
noalt_calling = "noalt_calling" in dd.get_tools_on(data) or "altcontigs" in dd.get_exclude_regions(data)
def callable_chrom_filter(r):
"""Filter to callable region, potentially limiting by chromosomes.
"""
return r.name == "CALLABLE" and (not noalt_calling or chromhacks.is_nonalt(r.chrom))
config = data["config"]
out_file = "%s-callable_sample.bed" % os.path.splitext(bam_file)[0]
with shared.bedtools_tmpdir({"config": config}):
with shared.bedtools_tmpdir(data):
callable_bed, depth_files = coverage.calculate(bam_file, data)
input_regions_bed = config["algorithm"].get("variant_regions", None)
input_regions_bed = dd.get_variant_regions(data)
if not utils.file_uptodate(out_file, callable_bed):
with file_transaction(config, out_file) as tx_out_file:
with file_transaction(data, out_file) as tx_out_file:
callable_regions = pybedtools.BedTool(callable_bed)
filter_regions = callable_regions.filter(callable_chrom_filter)
if input_regions_bed:
......@@ -101,11 +100,11 @@ def _combine_regions(all_regions, ref_regions):
bed_lines = ["%s\t%s\t%s" % (c, s, e) for (c, s, e) in all_intervals]
return pybedtools.BedTool("\n".join(bed_lines), from_string=True)
def _add_config_regions(nblock_regions, ref_regions, config):
def _add_config_regions(nblock_regions, ref_regions, data):
"""Add additional nblock regions based on configured regions to call.
Identifies user defined regions which we should not be analyzing.
"""
input_regions_bed = config["algorithm"].get("variant_regions", None)
input_regions_bed = dd.get_variant_regions(data)
if input_regions_bed:
input_regions = pybedtools.BedTool(input_regions_bed)
# work around problem with single region not subtracted correctly.
......@@ -119,9 +118,13 @@ def _add_config_regions(nblock_regions, ref_regions, config):
"excludes all genomic regions. Do the chromosome names "
"in the BED file match your genome (chr1 vs 1)?" % input_regions_bed)
all_intervals = _combine_regions([input_nblock, nblock_regions], ref_regions)
return all_intervals.merge()
else:
return nblock_regions
all_intervals = nblock_regions
if "noalt_calling" in dd.get_tools_on(data) or "altcontigs" in dd.get_exclude_regions(data):
from bcbio.heterogeneity import chromhacks
remove_intervals = ref_regions.filter(lambda r: not chromhacks.is_nonalt(r.chrom))
all_intervals = _combine_regions([all_intervals, remove_intervals], ref_regions)
return all_intervals.merge()
class NBlockRegionPicker:
"""Choose nblock regions reasonably spaced across chromosomes.
......@@ -179,15 +182,14 @@ def block_regions(callable_bed, in_bam, ref_file, data):
Identifies islands of callable regions, surrounding by regions
with no read support, that can be analyzed independently.
"""
config = data["config"]
min_n_size = int(config["algorithm"].get("nomap_split_size", 250))
with shared.bedtools_tmpdir({"config": config}):
min_n_size = int(data["config"]["algorithm"].get("nomap_split_size", 250))
with shared.bedtools_tmpdir(data):
nblock_bed = "%s-nblocks.bed" % utils.splitext_plus(callable_bed)[0]
callblock_bed = "%s-callableblocks.bed" % utils.splitext_plus(callable_bed)[0]
if not utils.file_uptodate(nblock_bed, callable_bed):
ref_regions = get_ref_bedtool(ref_file, config)
ref_regions = get_ref_bedtool(ref_file, data["config"])
nblock_regions = _get_nblock_regions(callable_bed, min_n_size, ref_regions)
nblock_regions = _add_config_regions(nblock_regions, ref_regions, config)
nblock_regions = _add_config_regions(nblock_regions, ref_regions, data)
with file_transaction(data, nblock_bed, callblock_bed) as (tx_nblock_bed, tx_callblock_bed):
nblock_regions.filter(lambda r: len(r) > min_n_size).saveas(tx_nblock_bed)
if len(ref_regions.subtract(nblock_regions, nonamecheck=True)) > 0:
......@@ -195,7 +197,7 @@ def block_regions(callable_bed, in_bam, ref_file, data):
else:
raise ValueError("No callable regions found from BAM file. Alignment regions might "
"not overlap with regions found in your `variant_regions` BED: %s" % in_bam)
return callblock_bed, nblock_bed, callable_bed
return callblock_bed, nblock_bed
def _write_bed_regions(data, final_regions, out_file, out_file_ref):
ref_file = tz.get_in(["reference", "fasta", "base"], data)
......
......@@ -40,7 +40,10 @@ def _trim_adapters(fastq_files, out_dir, data):
MYSEQUENCEAAAARETPADA -> MYSEQUENCEAAAA (no polyA trim)
"""
to_trim = _get_sequences_to_trim(data["config"], SUPPORTED_ADAPTERS)
out_files, report_file = _atropos_trim(fastq_files, to_trim, out_dir, data)
if dd.get_trim_reads(data) == "fastp":
out_files, report_file = _fastp_trim(fastq_files, to_trim, out_dir, data)
else:
out_files, report_file = _atropos_trim(fastq_files, to_trim, out_dir, data)
# quality_format = _get_quality_format(data["config"])
# out_files = replace_directory(append_stem(fastq_files, "_%s.trimmed" % name), out_dir)
# log_file = "%s_log_cutadapt.txt" % splitext_plus(out_files[0])[0]
......@@ -52,6 +55,8 @@ def _trim_adapters(fastq_files, out_dir, data):
# open(log_file, 'w').write(content)
return out_files
# ## Atropos trimming
def _atropos_trim(fastq_files, adapters, out_dir, data):
"""Perform multicore trimming with atropos.
"""
......@@ -63,16 +68,24 @@ def _atropos_trim(fastq_files, adapters, out_dir, data):
tx_report_file, tx_out1 = tx_out[:2]
if len(tx_out) > 2:
tx_out2 = tx_out[2]
adapters_args = " ".join(["-a %s" % a for a in adapters])
# polyX trimming, anchored to the 3' ends of reads
if "polyx" in dd.get_adapters(data):
adapters += ["A{200}$", "C{200}$", "G{200}$", "T{200}$"]
adapters_args = " ".join(["-a '%s'" % a for a in adapters])
adapters_args += " --overlap 8" # Avoid very short internal matches (default is 3)
adapters_args += " --no-default-adapters --no-cache-adapters" # Prevent GitHub queries and saving pickles
aligner_args = "--aligner adapter"
if len(fastq_files) == 1:
cores = dd.get_num_cores(data)
input_args = "-se %s" % objectstore.cl_input(fastq_files[0])
output_args = "-o >(bgzip --threads %s -c > {tx_out1})".format(**locals())
output_args = "-o >(bgzip --threads {cores} -c > {tx_out1})".format(**locals())
else:
assert len(fastq_files) == 2, fastq_files
adapters_args = adapters_args + " " + " ".join(["-A %s" % a for a in adapters])
cores = max(1, dd.get_num_cores(data) // 2)
adapters_args = adapters_args + " " + " ".join(["-A '%s'" % a for a in adapters])
input_args = "-pe1 %s -pe2 %s" % tuple([objectstore.cl_input(x) for x in fastq_files])
output_args = "-o >(bgzip -c > {tx_out1}) -p >(bgzip -c > {tx_out2})".format(**locals())
output_args = ("-o >(bgzip --threads {cores} -c > {tx_out1}) "
"-p >(bgzip --threads {cores} -c > {tx_out2})").format(**locals())
quality_base = "64" if dd.get_quality_format(data).lower() == "illumina" else "33"
sample_name = dd.get_sample_name(data)
report_args = "--report-file %s --report-formats json --sample-id %s" % (tx_report_file,
......@@ -81,16 +94,51 @@ def _atropos_trim(fastq_files, adapters, out_dir, data):
config_utils.get_resources("atropos", data["config"]).get("options", []))
extra_opts = []
for k, alt_ks, v in [("--quality-cutoff", ["-q "], "5"),
("--minimum-length", ["-m "], str(dd.get_min_read_length(data)))]:
("--minimum-length", ["-m "], str(dd.get_min_read_length(data))),
("--nextseq-trim", [], "25")]:
if k not in ropts and not any(alt_k in ropts for alt_k in alt_ks):
extra_opts.append("%s=%s" % (k, v))
extra_opts = " ".join(extra_opts)
thread_args = ("--threads %s" % dd.get_num_cores(data) if dd.get_num_cores(data) > 1 else "")
thread_args = ("--threads %s" % cores if cores > 1 else "")
cmd = ("atropos trim {ropts} {thread_args} --quality-base {quality_base} --format fastq "
"{adapters_args} {input_args} {output_args} {report_args} {extra_opts}")
do.run(cmd.format(**locals()), "Trimming with atropos: %s" % dd.get_sample_name(data))
return out_files, report_file
# ## fastp trimming
def _fastp_trim(fastq_files, adapters, out_dir, data):
"""Perform multicore trimming with fastp (https://github.com/OpenGene/fastp)
"""
report_file = os.path.join(out_dir, "%s-report.json" % utils.splitext_plus(os.path.basename(fastq_files[0]))[0])
out_files = [os.path.join(out_dir, "%s-trimmed.fq.gz" % utils.splitext_plus(os.path.basename(x))[0])
for x in fastq_files]
if not utils.file_exists(out_files[0]):
with file_transaction(data, *[report_file] + out_files) as tx_out:
tx_report = tx_out[0]
tx_out_files = tx_out[1:]
cmd = ["fastp", "--thread", dd.get_num_cores(data)]
if dd.get_quality_format(data).lower() == "illumina":
cmd += ["--phred64"]
for i, (inf, outf) in enumerate(zip(fastq_files, tx_out_files)):
if i == 0:
cmd += ["-i", inf, "-o", outf]
else:
cmd += ["-I", inf, "-O", outf]
cmd += ["--trim_poly_g", "--poly_g_min_len", "8",
"--cut_by_quality3", "--cut_mean_quality", "5",
"--length_required", str(dd.get_min_read_length(data)),
"--disable_quality_filtering"]
if "polyx" in dd.get_adapters(data):
cmd += ["--trim_poly_x", "--poly_x_min_len", "8"]
for a in adapters:
cmd += ["--adapter_sequence", a]
if not adapters:
cmd += ["--disable_adapter_trimming"]
cmd += ["--json", report_file, "--report_title", dd.get_sample_name(data)]
do.run(cmd, "Trimming with fastp: %s" % dd.get_sample_name(data))
return out_files, report_file
def _get_sequences_to_trim(config, builtin):
builtin_adapters = _get_builtin_adapters(config, builtin)
polya = builtin_adapters.get("polya", [None])[0]
......
......@@ -26,7 +26,7 @@ def get_default_jvm_opts(tmp_dir=None, parallel_gc=False):
Avoids issues with multiple spun up Java processes running into out of memory errors.
Parallel GC can use a lot of cores on big machines and primarily helps reduce task latency
and responsiveness which are not needed for batch jobs.
https://github.com/chapmanb/bcbio-nextgen/issues/532#issuecomment-50989027
https://github.com/bcbio/bcbio-nextgen/issues/532#issuecomment-50989027
https://wiki.csiro.au/pages/viewpage.action?pageId=545034311
http://stackoverflow.com/questions/9738911/javas-serial-garbage-collector-performing-far-better-than-other-garbage-collect
However, serial GC causes issues with Spark local runs so we use parallel for those cases:
......
import os
import sys
import toolz as tz
from bcbio import utils
from bcbio import bam
from bcbio.pipeline import config_utils
import bcbio.pipeline.datadict as dd
from bcbio.ngsalign import bowtie2, bwa
from bcbio.distributed.transaction import file_transaction
from bcbio.provenance import do
from bcbio.log import logger
def clean_chipseq_alignment(data):
aligner = dd.get_aligner(data)
data["raw_bam"] = dd.get_work_bam(data)
data["align_bam"] = dd.get_work_bam(data)
if aligner:
if aligner == "bowtie2":
filterer = bowtie2.filter_multimappers
......@@ -19,4 +26,47 @@ def clean_chipseq_alignment(data):
else:
logger.info("Warning: When BAM file is given as input, bcbio skips multimappers removal."
"If BAM is not cleaned for peak calling, can result in downstream errors.")
# lcr_bed = utils.get_in(data, ("genome_resources", "variation", "lcr"))
encode_bed = tz.get_in(["genome_resources", "variation", "encode_blacklist"], data)
if encode_bed:
data["work_bam"] = _prepare_bam(data["work_bam"], encode_bed, data['config'])
bam.index(data["work_bam"], data['config'])
data["bigwig"] = _bam_coverage(dd.get_sample_name(data), dd.get_work_bam(data), data)
return [[data]]
def _prepare_bam(bam_file, bed_file, config):
"""Remove regions from bed files"""
if not bam_file or not bed_file:
return bam_file
out_file = utils.append_stem(bam_file, '_filter')
bedtools = config_utils.get_program("bedtools", config)
if not utils.file_exists(out_file):
with file_transaction(out_file) as tx_out:
cmd = "{bedtools} subtract -nonamecheck -A -a {bam_file} -b {bed_file} > {tx_out}"
do.run(cmd.format(**locals()), "Clean %s" % bam_file)
return out_file
def get_genome(genome):
from bcbio.chipseq import macs2
loaded = macs2.HS
if genome in loaded:
return loaded[genome]
def _bam_coverage(name, bam_input, data):
"""Run bamCoverage from deeptools"""
cmd = ("{bam_coverage} -b {bam_input} -o {bw_output} "
"--binSize 20 --effectiveGenomeSize {size} "
"--smoothLength 60 --extendReads 150 --centerReads -p {cores}")
size = int(get_genome(dd.get_genome_build(data)))
cores = dd.get_num_cores(data)
try:
bam_coverage = config_utils.get_program("bamCoverage", data)
except config_utils.CmdNotFound:
logger.info("No bamCoverage found, skipping bamCoverage.")
return None
bw_output = os.path.join(os.path.dirname(bam_input), "%s.bw" % name)
if utils.file_exists(bw_output):
return bw_output
with file_transaction(bw_output) as out_tx:
do.run(cmd.format(**locals()), "Run bamCoverage in %s" % name)
return bw_output
......@@ -8,11 +8,11 @@ from bcbio.provenance import do
from bcbio.pipeline import config_utils
from bcbio import bam
HS = {"hg19": "2.7e9",
"GRCh37": "2.7e9",
"hg38": "2.7e9",
"mm10": "1.87e9",
"dm3": "1.2e8"}
HS = {"hg19": 2.7e9,
"GRCh37": 2.7e9,
"hg38": 2.7e9,
"mm10": 1.87e9,
"dm3": 1.2e8}
def run(name, chip_bam, input_bam, genome_build, out_dir, method, resources, config):
"""
......
......@@ -44,38 +44,19 @@ def peakcall_prepare(data, run_parallel):
def calling(data):
"""Main function to parallelize peak calling."""
chip_bam = dd.get_work_bam(data)
chip_bam = data.get("work_bam")
input_bam = data.get("work_bam_input", None)
caller_fn = get_callers()[data["peak_fn"]]
name = dd.get_sample_name(data)
out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), data["peak_fn"], name))
encode_bed = tz.get_in(["genome_resources", "variation", "encode_blacklist"], data)
# lcr_bed = utils.get_in(data, ("genome_resources", "variation", "lcr"))
if encode_bed:
chip_bam = _prepare_bam(chip_bam, encode_bed, data['config'])
data["work_bam_filter"] = chip_bam
input_bam = _prepare_bam(input_bam, encode_bed, data['config'])
data["input_bam_filter"] = input_bam
out_files = caller_fn(name, chip_bam, input_bam, dd.get_genome_build(data), out_dir,
dd.get_chip_method(data), data["resources"], data["config"])
greylistdir = greylisting(data)
data.update({"peaks_files": out_files})
# data["input_bam_filter"] = input_bam
if greylistdir:
data["greylist"] = greylistdir
return [[data]]
data["input_bam_filter"] = input_bam
def _prepare_bam(bam_file, bed_file, config):
"""Remove regions from bed files"""
if not bam_file or not bed_file:
return bam_file
out_file = utils.append_stem(bam_file, '_filter')
bedtools = config_utils.get_program("bedtools", config)
if not utils.file_exists(out_file):
with file_transaction(out_file) as tx_out:
cmd = "{bedtools} subtract -nonamecheck -A -a {bam_file} -b {bed_file} > {tx_out}"
do.run(cmd.format(**locals()), "Clean %s" % bam_file)
return out_file
def _sync(original, processed):
"""
......@@ -87,7 +68,7 @@ def _sync(original, processed):
original_sample[0]["peaks_files"] = {}
for process_sample in processed:
if dd.get_sample_name(original_sample[0]) == dd.get_sample_name(process_sample[0]):
for key in ["peaks_file", "work_bam_filter", "input_bam_filter"]:
for key in ["peaks_files"]:
if process_sample[0].get(key):
original_sample[0][key] = process_sample[0][key]
return original
......@@ -100,7 +81,7 @@ def _check(sample, data):
return None
for origin in data:
if dd.get_batch(sample) in (dd.get_batches(origin[0]) or []) and dd.get_phenotype(origin[0]) == "input":
sample["work_bam_input"] = dd.get_work_bam(origin[0])
sample["work_bam_input"] = origin[0].get("work_bam")
return [sample]
return [sample]
......@@ -150,3 +131,4 @@ def greylisting(data):
% dd.get_sample_name(data))
return None
return greylistdir
......@@ -759,6 +759,7 @@ def _directory_tarball(dirname):
tarball_dir = os.path.join(extra_tarball, tarball_dir)
tarball = os.path.join(base_dir, "%s-wf.tar.gz" % (tarball_dir.replace(os.path.sep, "--")))
if not utils.file_exists(tarball):
print("Preparing CWL input tarball: %s" % tarball)
with utils.chdir(base_dir):
with tarfile.open(tarball, "w:gz") as tar:
tar.add(tarball_dir)
......
......@@ -12,8 +12,9 @@ import tarfile
import toolz as tz
from bcbio import utils
from bcbio import bam, utils
from bcbio.pipeline import datadict as dd
from bcbio.variation import vcfutils
def to_rec(samples, default_keys=None):
"""Convert inputs into CWL records, useful for single item parallelization.
......@@ -190,3 +191,70 @@ def samples_to_records(samples, default_keys=None):
data["metadata"] = run_info.add_metadata_defaults(data.get("metadata", {}))
out.append(data)
return out
def assign_complex_to_samples(items):
"""Assign complex inputs like variants and align outputs to samples.
Handles list inputs to record conversion where we have inputs from multiple
locations and need to ensure they are properly assigned to samples in many
environments.
The unpleasant approach here is to use standard file naming to match
with samples so this can work in environments where we don't download/stream
the input files (for space/time savings).
"""
extract_fns = {("variants", "samples"): _get_vcf_samples,
("align_bam",): _get_bam_samples}
complex = {k: {} for k in extract_fns.keys()}
for data in items:
for k in complex:
v = tz.get_in(k, data)
if v is not None:
for s in extract_fns[k](v, items):
if s:
complex[k][s] = v
out = []
for data in items:
for k in complex:
newv = tz.get_in([k, dd.get_sample_name(data)], complex)
if newv:
data = tz.update_in(data, k, lambda x: newv)
out.append(data)
return out
def _get_vcf_samples(calls, items):
have_full_file = False
all_samples = set([])
sample_matches = False
for f in utils.flatten(calls):
if have_full_file:
cur = set(vcfutils.get_samples(f))
if cur:
if not all_samples:
all_samples = cur
else:
all_samples &= set(cur)
else:
for data in items:
for i, test_name in enumerate([dd.get_sample_name(data)] + dd.get_batches(data)):
if os.path.basename(f).startswith(("%s-" % test_name,
"%s." % test_name)):
# Prefer matches to single samples (gVCF) over joint batches
if i == 0:
sample_matches = True
if sample_matches and i > 0:
continue
else:
all_samples.add(dd.get_sample_name(data))
return list(all_samples)
def _get_bam_samples(f, items):
have_full_file = False
if have_full_file:
return [bam.sample_name(f)]
else:
for data in items:
if os.path.basename(f).startswith(("%s-" % dd.get_sample_name(data),
"%s." % dd.get_sample_name(data))):
return [dd.get_sample_name(data)]
return []
......@@ -69,7 +69,7 @@ def et(name, parallel, inputs, outputs, expression):
ExpressionTool = collections.namedtuple("ExpressionTool", "name inputs outputs expression parallel")
return ExpressionTool(name, inputs, outputs, expression, parallel)
def cwlout(key, valtype=None, extensions=None, fields=None):
def cwlout(key, valtype=None, extensions=None, fields=None, exclude=None):
"""Definition of an output variable, defining the type and associated secondary files.
"""
out = {"id": key}
......@@ -79,6 +79,8 @@ def cwlout(key, valtype=None, extensions=None, fields=None):
out["fields"] = fields
if extensions:
out["secondaryFiles"] = extensions
if exclude:
out["exclude"] = exclude
return out
def _alignment(checkpoints):
......@@ -88,7 +90,7 @@ def _alignment(checkpoints):
fields=[cwlout(["files"], ["null", {"type": "array", "items": "File"}], [".gbi"]),
cwlout(["config", "algorithm", "quality_format"], ["string", "null"]),
cwlout(["align_split"], ["string", "null"])])],
"bcbio-vc", ["grabix", "htslib", "biobambam"],
"bcbio-vc", ["grabix", "htslib", "biobambam", "atropos;env=python3"],
disk={"files": 1.5}),
s("process_alignment", "single-parallel" if checkpoints["align_split"] else "single-single",
[["alignment_rec"], ["process_alignment_rec"]],
......@@ -154,7 +156,7 @@ def _variant_vc(checkpoints):
vc_wf += [s("postprocess_variants", "batch-single",
[["batch_rec"], ["vrn_file"]],
[cwlout(["vrn_file"], "File", [".tbi"])],
"bcbio-vc", ["snpeff=4.3i"], disk={"files": 0.5})]
"bcbio-vc", ["snpeff=4.3.1t"], disk={"files": 0.5})]
vc_wf += [s("compare_to_rm", "batch-single",
[["batch_rec"], ["vrn_file"]],
[cwlout("vc_rec", "record",
......@@ -163,7 +165,9 @@ def _variant_vc(checkpoints):
cwlout(["validate", "tp"], ["File", "null"], [".tbi"]),
cwlout(["validate", "fp"], ["File", "null"], [".tbi"]),
cwlout(["validate", "fn"], ["File", "null"], [".tbi"]),
cwlout("inherit")])],
cwlout("inherit", exclude=[["align_bam"], ["reference", "twobit"],
["reference", "snpeff"], ["reference", "rtg"],
["genome_resources", "variation"]])])],
"bcbio-vc", ["bcftools", "bedtools", "pythonpy", "gvcf-regions",
"htslib", "rtg-tools", "vcfanno"],
disk={"files": 1.5})]
......@@ -173,6 +177,8 @@ def _variant_vc(checkpoints):
["metadata", "batch"], ["metadata", "phenotype"],
["regions", "sample_callable"], ["config", "algorithm", "variantcaller"],
["config", "algorithm", "coverage_interval"],
["config", "algorithm", "effects"],
["config", "algorithm", "exclude_regions"],
["config", "algorithm", "variant_regions"],
["config", "algorithm", "validate"], ["config", "algorithm", "validate_regions"],
["config", "algorithm", "tools_on"],
......@@ -180,6 +186,8 @@ def _variant_vc(checkpoints):
["reference", "fasta", "base"], ["reference", "twobit"],
["reference", "rtg"], ["reference", "genome_context"],
["genome_resources", "variation", "cosmic"], ["genome_resources", "variation", "dbsnp"],
["genome_resources", "variation", "lcr"], ["genome_resources", "variation", "polyx"],
["genome_resources", "variation", "encode_blacklist"],
["genome_resources", "aliases", "ensembl"], ["genome_resources", "aliases", "human"],
["genome_resources", "aliases", "snpeff"], ["reference", "snpeff", "genome_build"]],
[cwlout("batch_rec", "record")],
......@@ -221,7 +229,7 @@ def _variant_jointvc():
s("postprocess_variants", "batch-single",
[["jointvc_batch_rec"], ["vrn_file_joint"]],
[cwlout(["vrn_file_joint"], "File", [".tbi"])],
"bcbio-vc", ["snpeff=4.3i"],
"bcbio-vc", ["snpeff=4.3.1t"],
disk={"files": 1.5}),
s("finalize_jointvc", "batch-single",
[["jointvc_batch_rec"], ["vrn_file_joint"]],
......@@ -241,14 +249,12 @@ def _variant_checkpoints(samples):
"""Check sample configuration to identify required steps in analysis.
"""
checkpoints = {}
checkpoints["vc"] = any([dd.get_variantcaller(d) for d in samples])
checkpoints["vc"] = any([dd.get_variantcaller(d) or d.get("vrn_file") for d in samples])
checkpoints["sv"] = any([dd.get_svcaller(d) for d in samples])
checkpoints["jointvc"] = any([(dd.get_jointcaller(d) or ("gvcf" in dd.get_tools_on(d))) and dd.get_batch(d)
for d in samples])
checkpoints["hla"] = any([dd.get_hlacaller(d) for d in samples])
# Currently always have alignment on until expression tool widely supported
checkpoints["align"] = True
#checkpoints["align"] = any([(dd.get_aligner(d) or dd.get_bam_clean(d)) for d in samples])
checkpoints["align"] = any([(dd.get_aligner(d) or dd.get_bam_clean(d)) for d in samples])
checkpoints["align_split"] = not all([(dd.get_align_split_size(d) is False or
not dd.get_aligner(d))
for d in samples])
......@@ -268,6 +274,8 @@ def variant(samples):
["rgnames", "lane"], ["rgnames", "rg"], ["rgnames", "lb"],
["reference", "aligner", "indexes"],
["config", "algorithm", "aligner"],
["config", "algorithm", "trim_reads"],
["config", "algorithm", "adapters"],
["config", "algorithm", "bam_clean"],
["config", "algorithm", "mark_duplicates"]],
[cwlout("alignment_rec", "record")],
......@@ -277,13 +285,13 @@ def variant(samples):
[["align_split"], ["process_alignment_rec"],
["work_bam"], ["config", "algorithm", "quality_format"]])]
else:
align = [et("organize_align_bam", "multi-parallel",
["files"],
[cwlout(["align_bam"], ["File", "null"], [".bai"]),
cwlout(["work_bam_plus", "disc"], "null"),
cwlout(["work_bam_plus", "sr"], "null"),
cwlout(["hla", "fastq"], "null")],
"""${return {"align_bam": inputs.files[0]}}""")]
align = [s("organize_noalign", "multi-parallel",
["files"],
[cwlout(["align_bam"], "File", [".bai"]),
cwlout(["work_bam_plus", "disc"], ["File", "null"]),
cwlout(["work_bam_plus", "sr"], ["File", "null"]),
cwlout(["hla", "fastq"], ["File", "null"])],
"bcbio-vc", cores=1, no_files=True)]
align += [s("prep_samples_to_rec", "multi-combined",
[["config", "algorithm", "coverage"],
["config", "algorithm", "variant_regions"],
......@@ -305,6 +313,7 @@ def variant(samples):
s("postprocess_alignment_to_rec", "multi-combined",
[["align_bam"],
["config", "algorithm", "coverage_interval"],
["config", "algorithm", "exclude_regions"],
["config", "algorithm", "variant_regions"],
["config", "algorithm", "variant_regions_merged"],
["config", "algorithm", "variant_regions_orig"],
......@@ -316,6 +325,8 @@ def variant(samples):
["config", "algorithm", "tools_on"],
["genome_resources", "rnaseq", "gene_bed"],
["genome_resources", "variation", "dbsnp"],
["genome_resources", "variation", "lcr"], ["genome_resources", "variation", "polyx"],
["genome_resources", "variation", "encode_blacklist"],
["reference", "twobit"],
["reference", "fasta", "base"]],
[cwlout("postprocess_alignment_rec", "record")],
......@@ -368,7 +379,7 @@ def _qc_workflow(checkpoints):
qc_inputs = \
[["align_bam"], ["analysis"], ["reference", "fasta", "base"],
["config", "algorithm", "tools_on"], ["config", "algorithm", "tools_off"],
["genome_build"], ["config", "algorithm", "qc"],
["genome_build"], ["config", "algorithm", "qc"], ["metadata", "batch"],
["config", "algorithm", "coverage_interval"],
["depth", "variant_regions", "regions"], ["depth", "variant_regions", "dist"],
["depth", "samtools", "stats"], ["depth", "samtools", "idxstats"],
......@@ -414,11 +425,12 @@ def _variant_sv(checkpoints):
[cwlout("sv_rec", "record",
fields=[cwlout(["sv", "variantcaller"], ["string", "null"]),
cwlout(["sv", "vrn_file"], ["File", "null"], [".tbi"]),
cwlout(["svvalidate", "summary"], ["File", "null"]),
cwlout("inherit")])],
"bcbio-vc", ["bedtools", "cnvkit", "delly", "extract-sv-reads",
"lumpy-sv", "manta", "break-point-inspector", "mosdepth", "samtools",
"pysam>=0.13.0",
"seq2c", "simple_sv_annotation", "svtools", "svtyper",
"smoove", "pysam>=0.13.0",
"seq2c", "simple_sv_annotation", "survivor", "svtools", "svtyper",
"r=3.4.1", "vawk"],
disk={"files": 2.0})]
steps = [s("calculate_sv_bins", "multi-combined",
......@@ -426,11 +438,14 @@ def _variant_sv(checkpoints):
["metadata", "batch"], ["metadata", "phenotype"],
["config", "algorithm", "callable_regions"],
["config", "algorithm", "coverage_interval"],
["config", "algorithm", "exclude_regions"],
["config", "algorithm", "sv_regions"],
["config", "algorithm", "variant_regions"],
["config", "algorithm", "variant_regions_merged"],
["config", "algorithm", "svcaller"],
["depth", "variant_regions", "regions"],
["genome_resources", "variation", "lcr"], ["genome_resources", "variation", "polyx"],
["genome_resources", "variation", "encode_blacklist"],
["genome_resources", "rnaseq", "gene_bed"]],
[cwlout("sv_bin_rec", "record",
fields=[cwlout(["regions", "bins", "target"], ["File", "null"]),
......@@ -451,6 +466,7 @@ def _variant_sv(checkpoints):
[["sv_rawcoverage_rec"]],
[cwlout("sv_coverage_rec", "record",
fields=[cwlout(["depth", "bins", "normalized"], ["File", "null"]),
cwlout(["depth", "bins", "background"], ["File", "null"]),
cwlout("inherit")])],
"bcbio-vc", ["cnvkit"],
disk={"files": 1.5}),
......@@ -459,6 +475,7 @@ def _variant_sv(checkpoints):
["work_bam_plus", "disc"], ["work_bam_plus", "sr"],
["config", "algorithm", "tools_on"],
["config", "algorithm", "tools_off"],
["config", "algorithm", "svvalidate"], ["regions", "sample_callable"],
["sv_coverage_rec"]],
[cwlout("sv_batch_rec", "record")],
"bcbio-vc",
......@@ -466,9 +483,11 @@ def _variant_sv(checkpoints):
w("svcall", "multi-parallel", sv, []),
s("summarize_sv", "multi-combined",
[["sv_rec"]],
[cwlout(["sv", "calls"], {"type": "array", "items": ["File", "null"]})],
[cwlout(["sv", "calls"], {"type": "array", "items": ["File", "null"]}),
cwlout(["svvalidate", "grading_summary"], ["File", "null"]),
cwlout(["svvalidate", "grading_plots"], {"type": "array", "items": ["File", "null"]})],
"bcbio-vc", disk={"files": 1.0}, cores=1)]
final_outputs = [["sv", "calls"]]
final_outputs = [["sv", "calls"], ["svvalidate", "grading_summary"]]
return steps, final_outputs
def rnaseq(samples):
......
......@@ -64,6 +64,7 @@ def _run_cwltool(args):
main_file, json_file, project_name = _get_main_and_json(args.directory)
work_dir = utils.safe_makedir(os.path.join(os.getcwd(), "cwltool_work"))
tmp_dir = utils.safe_makedir(os.path.join(work_dir, "tmpcwl"))
log_file = os.path.join(work_dir, "%s-cwltool.log" % project_name)
os.environ["TMPDIR"] = tmp_dir
flags = ["--tmpdir-prefix", tmp_dir, "--tmp-outdir-prefix", tmp_dir]
if args.no_container:
......@@ -71,7 +72,7 @@ def _run_cwltool(args):
flags += ["--no-container", "--preserve-environment", "PATH", "--preserve-environment", "HOME"]
cmd = ["cwltool"] + flags + args.toolargs + ["--", main_file, json_file]
with utils.chdir(work_dir):
_run_tool(cmd, not args.no_container, work_dir)
_run_tool(cmd, not args.no_container, work_dir, log_file=log_file)
def _run_arvados(args):
"""Run CWL on Arvados.
......
......@@ -233,9 +233,11 @@ def _flatten_nested_input(v):
new_type = x["items"]
elif isinstance(x, basestring) and x == "null":
want_null = True
else:
new_type = x
if want_null:
if not isinstance(new_type, (list, tuple)):
new_type = [new_type]
new_type = [new_type] if new_type is not None else []
for toadd in ["null", "string"]:
if toadd not in new_type:
new_type.append(toadd)
......@@ -343,10 +345,12 @@ def _create_record(name, field_defs, step_name, inputs, unlist, file_vs, std_vs,
fields = []
inherit = []
inherit_all = False
inherit_exclude = []
for fdef in field_defs:
if not fdef.get("type"):
if fdef["id"] == "inherit":
inherit_all = True
inherit_exclude = fdef.get("exclude", [])
else:
inherit.append(fdef["id"])
else:
......@@ -354,7 +358,7 @@ def _create_record(name, field_defs, step_name, inputs, unlist, file_vs, std_vs,
"type": fdef["type"]}
fields.append(_add_secondary_to_rec_field(fdef, cur))
if inherit_all:
fields.extend(_infer_record_outputs(inputs, unlist, file_vs, std_vs, parallel))
fields.extend(_infer_record_outputs(inputs, unlist, file_vs, std_vs, parallel, exclude=inherit_exclude))
elif inherit:
fields.extend(_infer_record_outputs(inputs, unlist, file_vs, std_vs, parallel, inherit))
else:
......@@ -373,13 +377,15 @@ def _add_secondary_to_rec_field(orig, cur):
cur["secondaryFiles"] = orig.get("secondaryFiles")
return cur
def _infer_record_outputs(inputs, unlist, file_vs, std_vs, parallel, to_include=None):
def _infer_record_outputs(inputs, unlist, file_vs, std_vs, parallel, to_include=None,
exclude=None):
"""Infer the outputs of a record from the original inputs
"""
fields = []
unlist = set([_get_string_vid(x) for x in unlist])
input_vids = set([_get_string_vid(v) for v in _handle_special_inputs(inputs, file_vs)])
to_include = set([_get_string_vid(x) for x in to_include]) if to_include else None
to_exclude = tuple(set([_get_string_vid(x) for x in exclude])) if exclude else None
added = set([])
for raw_v in std_vs + [v for v in file_vs if get_base_id(v["id"]) in input_vids]:
# unpack record inside this record and un-nested inputs to avoid double nested
......@@ -392,13 +398,14 @@ def _infer_record_outputs(inputs, unlist, file_vs, std_vs, parallel, to_include=
for orig_v in nested_vs:
if (get_base_id(orig_v["id"]) not in added
and (not to_include or get_base_id(orig_v["id"]) in to_include)):
cur_v = {}
cur_v["name"] = get_base_id(orig_v["id"])
cur_v["type"] = orig_v["type"]
if cur_v["name"] in unlist:
cur_v = _flatten_nested_input(cur_v)
fields.append(_add_secondary_to_rec_field(orig_v, cur_v))
added.add(get_base_id(orig_v["id"]))
if to_exclude is None or not get_base_id(orig_v["id"]).startswith(to_exclude):
cur_v = {}
cur_v["name"] = get_base_id(orig_v["id"])
cur_v["type"] = orig_v["type"]
if cur_v["name"] in unlist:
cur_v = _flatten_nested_input(cur_v)
fields.append(_add_secondary_to_rec_field(orig_v, cur_v))
added.add(get_base_id(orig_v["id"]))
return fields
def _create_variable(orig_v, step, variables):
......
......@@ -9,7 +9,7 @@ from bcbio.cwl import create as cwl_create
from bcbio.cwl import cwlutils
from bcbio.rnaseq import (sailfish, rapmap, salmon, umi, kallisto, spikein)
from bcbio.ngsalign import alignprep
from bcbio.pipeline import (archive, disambiguate, qcsummary, region, sample,
from bcbio.pipeline import (archive, alignment, disambiguate, qcsummary, region, sample,
main, shared, variation, run_info, rnaseq)
from bcbio.qc import multiqc, qsignature
from bcbio.structural import regions as svregions
......@@ -122,6 +122,10 @@ def alignment_to_rec(*args):
"rgnames__lane", "rgnames__rg", "rgnames__lb"]
return cwlutils.to_rec_single(*args, default_keys=default_keys)
@utils.map_wrap
def organize_noalign(*args):
return alignment.organize_noalign(args)
@utils.map_wrap
def postprocess_alignment_to_rec(*args):
default_keys = ["config__algorithm__coverage_interval", "config__algorithm__seq2c_bed_ready",
......
......@@ -26,7 +26,8 @@ def _get_calls(data, cnv_only=False):
def get_variants(data):
"""Retrieve set of variant calls to use for heterogeneity analysis.
"""
supported = ["vardict", "vardict-java", "vardict-perl", "strelka2", "mutect2", "freebayes", "mutect"]
supported = ["precalled", "vardict", "vardict-java", "vardict-perl",
"strelka2", "mutect2", "freebayes", "mutect"]
out = []
for v in data.get("variants", []):
if v["variantcaller"] in supported:
......
......@@ -29,10 +29,10 @@ from bcbio.distributed.transaction import file_transaction
from bcbio.pipeline import datadict as dd
REMOTES = {
"requirements": "https://raw.githubusercontent.com/chapmanb/bcbio-nextgen/master/requirements-conda.txt",
"gitrepo": "https://github.com/chapmanb/bcbio-nextgen.git",
"requirements": "https://raw.githubusercontent.com/bcbio/bcbio-nextgen/master/requirements-conda.txt",
"gitrepo": "https://github.com/bcbio/bcbio-nextgen.git",
"cloudbiolinux": "https://github.com/chapmanb/cloudbiolinux/archive/master.tar.gz",
"genome_resources": "https://raw.github.com/chapmanb/bcbio-nextgen/master/config/genomes/%s-resources.yaml",
"genome_resources": "https://raw.github.com/bcbio/bcbio-nextgen/master/config/genomes/%s-resources.yaml",
"snpeff_dl_url": ("http://downloads.sourceforge.net/project/snpeff/databases/v{snpeff_ver}/"
"snpEff_v{snpeff_ver}_{genome}.zip")}
SUPPORTED_GENOMES = ["GRCh37", "hg19", "hg38", "hg38-noalt", "mm10", "mm9",
......@@ -222,9 +222,10 @@ def _check_for_conda_problems():
"""
conda_bin = _get_conda_bin()
lib_dir = os.path.join(os.path.dirname(conda_bin), os.pardir, "iib")
if not os.path.exists(os.path.join(lib_dir, "libquadmath.so")):
subprocess.check_call([conda_bin, "install", "-f",
"--yes", "-c", "bioconda", "-c", "conda-forge", "libgcc-ng"])
for l in ["libgomp.so.1", "libquadmath.so"]:
if not os.path.exists(os.path.join(lib_dir, l)):
subprocess.check_call([conda_bin, "install", "-f",
"--yes", "-c", "bioconda", "-c", "conda-forge", "libgcc-ng"])
def _update_conda_packages():
"""If installed in an anaconda directory, upgrade conda packages.
......