Skip to content
Commits on Source (8)
Dockerfile
.git
.dockerignore
Brewfile
---
name: Report a Bug
about: Report a bug that causes vg to crash or otherwise behave incorrectly
title: ''
labels: ''
assignees: ''
---
<!--
Please answer the following questions about your bug.
If you copy commands or output from your terminal, please place the text in its own paragraph, surrounded by lines of three backticks.
```
Like this.
```
-->
**1. What were you trying to do?**
**2. What did you want to happen?**
**3. What actually happened?**
**4. If you got a line like `Stack trace path: /somewhere/on/your/computer/stacktrace.txt`, please copy-paste the contents of that file here:**
```
Place stacktrace here.
```
**5. What data and command can the vg dev team use to make the problem happen?**
**6. What does running `vg version` say?**
```
Place vg version output here
```
---
name: Support Request
about: Get help installing or using vg, or get questions answered
title: ''
labels: ''
assignees: ''
---
**PLEASE DO NOT MAKE SUPPORT REQUESTS HERE**
Please the Biostars forum instead:
https://www.biostars.org/p/new/post/?tag_val=vg
......@@ -42,7 +42,7 @@ test-job:
# Run in parallel, setting CI_NODE_INDEX and CI_NODE_TOTAL
# We will find our share of tests from vgci/test-list.txt and run them
# We ought to run one job per test, but we can wrap around.
parallel: 19
parallel: 16
script:
- docker pull "quay.io/vgteam/vg:ci-${CI_PIPELINE_IID}-${CI_COMMIT_SHA}"
- docker tag "quay.io/vgteam/vg:ci-${CI_PIPELINE_IID}-${CI_COMMIT_SHA}" vgci-docker-vg-local
......
......@@ -22,9 +22,6 @@
[submodule "sha1"]
path = deps/sha1
url = https://github.com/vog/sha1.git
[submodule "protobuf"]
path = deps/protobuf
url = https://github.com/google/protobuf.git
[submodule "gcsa2"]
path = deps/gcsa2
url = https://github.com/jltsiren/gcsa2.git
......@@ -39,7 +36,7 @@
url = https://github.com/sparsehash/sparsehash.git
[submodule "gfakluge"]
path = deps/gfakluge
url = https://github.com/edawson/gfakluge.git
url = https://github.com/vgteam/gfakluge.git
[submodule "deps/DYNAMIC"]
path = deps/DYNAMIC
url = https://github.com/vgteam/DYNAMIC
......@@ -113,3 +110,18 @@
[submodule "deps/libbdsg"]
path = deps/libbdsg
url = https://github.com/vgteam/libbdsg.git
[submodule "deps/xg"]
path = deps/xg
url = https://github.com/vgteam/xg.git
[submodule "deps/gbwtgraph"]
path = deps/gbwtgraph
url = https://github.com/jltsiren/gbwtgraph.git
[submodule "deps/ips4o"]
path = deps/ips4o
url = https://github.com/vgteam/ips4o.git
[submodule "deps/mmmultimap"]
path = deps/mmmultimap
url = https://github.com/ekg/mmmultimap.git
[submodule "vgteam_bbhash"]
path = deps/BBHash
url = https://github.com/vgteam/BBHash.git
......@@ -3,7 +3,7 @@
language: cpp
compiler: gcc
sudo: required
dist: trusty
dist: bionic
# We have some shenanigans to let us cache submodules, and update changed files
# without messing up mtimes and triggering rebuilds unnecessarily. Travis checks
# out our submodules and then restores the cache over them. We move the cached
......@@ -18,14 +18,12 @@ before_install:
- rm -Rf deps
# Keep the cached deps if the right compiler version was used.
# Otherwise start fresh
- if [[ "$TRAVIS_OS_NAME" == "linux" && -e deps_cached/gcc6 ]]; then mv deps_cached deps; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" && "$INSTALL_GCC" == "1" && -e deps_cached/gcc6 ]]; then mv deps_cached deps; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" && "$INSTALL_GCC" == "0" && -e deps_cached/clang ]]; then mv deps_cached deps; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" && -e "deps_cached/gcc$(gcc -dumpversion)" ]]; then mv deps_cached deps; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" && -e deps_cached/clang ]]; then mv deps_cached deps; fi
- (ls -lah deps/; ls -lah bin/; ls -lah lib/; ls -lah include/) || true
- git submodule update --init --recursive
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ls /etc/apt/sources.list.d; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-6 60 --slave /usr/bin/g++ g++ /usr/bin/g++-6; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mkdir -p deps; touch deps/gcc6; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mkdir -p deps; touch "deps/gcc$(gcc -dumpversion)"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export PKG_CONFIG_PATH="/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH"; fi
# Travis can't always avtually use Homebrew to install our dependencies
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; brew bundle --verbose; fi
......@@ -33,13 +31,7 @@ before_install:
# Error: HOMEBREW_LOGS was not exported!
# Please don't worry, you likely hit a bug auto-updating from an old version.
# Rerun your command, everything is up-to-date and fine now.
- if [[ "$TRAVIS_OS_NAME" == "osx" && "$INSTALL_GCC" == "1" ]]; then brew install gcc6 || brew install gcc6 || true; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" && "$INSTALL_GCC" == "1" ]]; then brew link --overwrite gcc@6; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" && "$INSTALL_GCC" == "1" ]]; then mkdir -p ./bin; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" && "$INSTALL_GCC" == "1" ]]; then ln -sf `which g++-6` ./bin/g++; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" && "$INSTALL_GCC" == "1" ]]; then ln -sf `which gcc-6` ./bin/gcc; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" && "$INSTALL_GCC" == "1" ]]; then mkdir -p deps; touch deps/gcc6; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" && "$INSTALL_GCC" == "0" ]]; then mkdir -p deps; touch deps/clang; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mkdir -p deps; touch deps/clang; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export PATH="/usr/local/opt/bison/bin:$PATH"; fi # Homebrew no longer links Bison even if we try to force it
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export PATH="$(pwd)/bin:/usr/local/opt/coreutils/libexec/gnubin:/usr/local/bin:$PATH"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LD_LIBRARY_PATH=/usr/local/lib/; fi
......@@ -53,8 +45,8 @@ before_install:
install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make get-deps; fi
script:
- if [[ -z "$BUILD_DOCS_ONLY" && "$TRAVIS_OS_NAME" == "linux" ]]; then make -j4 && echo Testing && make test && make static -j4; fi
- if [[ -z "$BUILD_DOCS_ONLY" && "$TRAVIS_OS_NAME" == "osx" ]]; then timeout 1800 make deps -j4 && make -j4 && echo Testing && make test; fi
- if [[ -z "$BUILD_DOCS_ONLY" && "$TRAVIS_OS_NAME" == "linux" ]]; then make -j4 && echo Testing && bin/vg test "Target to alignment extraction" && echo Full Testing && make test && make static -j4; fi
- if [[ -z "$BUILD_DOCS_ONLY" && "$TRAVIS_OS_NAME" == "osx" ]]; then timeout 1800 make deps -j4 && make -j4 && echo Testing && bin/vg test "Target to alignment extraction" && echo Full Testing && make test; fi
- if [[ ! -z "$BUILD_DOCS_ONLY" ]]; then doc/publish-docs.sh; fi
# Cache all our dependency directories, and our lib and include
cache:
......@@ -72,12 +64,8 @@ addons:
homebrew:
brewfile: true
apt:
sources:
- ubuntu-toolchain-r-test
update: true
packages: # Get all the current dependency packages in advance. We will still do make get-deps but it will do less work.
- gcc-6
- g++-6
- bc
- rs
- jq
......@@ -85,6 +73,7 @@ addons:
- cmake
- protobuf-compiler
- libprotoc-dev
- libprotobuf-dev
- libjansson-dev
- libbz2-dev
- libncurses5-dev
......@@ -120,14 +109,8 @@ env:
global:
- DOCS_KEY_ENCRYPTION_LABEL=125272388526
- VG_FULL_TRACEBACK=1
matrix:
- INSTALL_GCC=1
- INSTALL_GCC=0
matrix:
exclude:
- os: linux
env: INSTALL_GCC=1
include:
# We have a special entry to do the docs build
- os: linux
......
brew "jq"
brew "jansson"
brew "protobuf"
brew "md5sha1sum"
brew "samtools"
brew "bison"
......
# Multi-container Dockerfile for build and run containers for vg
FROM ubuntu:18.04 AS base
MAINTAINER vgteam
RUN echo base > /stage.txt
WORKDIR /vg
RUN ls -lah /vg || echo "No vg directory exists yet"
FROM base AS build
RUN echo build > /stage.txt
RUN ls -lah /vg || echo "No vg directory exists yet"
# Copy vg build tree into place
COPY . /vg
RUN ls -lah /vg || echo "No vg directory exists yet"
# Install the base packages needed to let vg install packages.
# Make sure this runs after vg sources are imported so vg will always have an
# up to date package index to get its dependencies.
# We don't need to clean the package index since we don't ship this image and
# don't care about its size.
# We don't want to install too much stuff here, because we want to test vg's
# make get-deps to make sure it isn't missing something
RUN apt-get -qq -y update && \
apt-get -qq -y upgrade && \
apt-get -qq -y install \
make \
sudo
# To increase portability of the docker image, set the target CPU architecture to
# Nehalem (2008) rather than auto-detecting the build machine's CPU.
# This has no AVX1, AVX2, or PCLMUL, but it does have SSE4.2.
# UCSC has a Nehalem machine that we want to support.
RUN sed -i s/march=native/march=nehalem/ deps/sdsl-lite/CMakeLists.txt
RUN make get-deps && . ./source_me.sh && env && make include/vg_git_version.hpp && CXXFLAGS=" -march=nehalem " make -j$(nproc) && make static && strip bin/vg
ENV PATH /vg/bin:$PATH
############################################################################################
FROM build AS test
RUN echo test > /stage.txt
# The test need BWA
COPY --from=quay.io/ucsc_cgl/bwa:0.7.15--a17c6544342330f6ea7a23a37d23273ab1c52d21 /usr/local/bin/bwa /usr/local/bin/bwa
# The tests need some extra packages.
# TODO: Which of these can we remove?
# No clean necessary since we aren't shipping this
RUN apt-get -qq -y update && \
apt-get -qq -y upgrade && \
apt-get -qq -y install \
pigz \
dstat \
pv \
jq \
samtools \
tabix \
parallel \
bsdmainutils \
rs \
fontconfig-config
# Fail if any non-portable instructions were used
RUN /bin/bash -e -c 'if objdump -d /vg/bin/vg | grep vperm2i128 ; then exit 1 ; else exit 0 ; fi'
# Run tests in the middle so the final container that gets tagged is the run container.
RUN make test
############################################################################################
FROM base AS run
RUN echo run > /stage.txt
RUN ls -lah /vg || echo "No vg directory exists yet"
COPY --from=build /vg/bin/vg /vg/bin/
RUN ls -lah /vg || echo "No vg directory exists yet"
COPY --from=build /vg/scripts/* /vg/scripts/
RUN ls -lah /vg || echo "No vg directory exists yet"
# Install packages which toil-vg needs to be available inside the image, for pipes
# TODO: which of these can be removed?
# Make sure to clean so we don't ship old apt package indexes in our Docker.
RUN ls -lah /vg && \
apt-get -qq -y update && \
apt-get -qq -y upgrade && \
apt-get -qq -y install \
curl \
wget \
pigz \
dstat \
pv \
jq \
samtools \
tabix \
parallel \
fontconfig-config \
&& apt-get -qq -y clean
ENV PATH /vg/bin:$PATH
# Dockerfile for shipping just the vg binary you have
# Run with DOCKER_BUILDKIT=1 to avoid shipping the whole vg directory as context
FROM ubuntu:18.04
MAINTAINER vgteam
WORKDIR /vg
ENV PATH /vg/bin:$PATH
ENTRYPOINT /vg/bin/vg
# Prevent dpkg from trying to ask any questions, ever
ENV DEBIAN_FRONTEND noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN true
# Install dependencies for scripts
RUN apt-get -qq -y update && \
apt-get -qq -y upgrade && \
apt-get -qq -y install \
numactl \
python3-matplotlib \
python3-numpy \
awscli \
bwa \
jq \
bc \
linux-tools-common \
linux-tools-generic \
binutils \
perl \
&& apt-get -qq -y clean
COPY deps/FlameGraph /vg/deps/FlameGraph
COPY scripts /vg/scripts
COPY bin/vg /vg/bin/vg
Please describe:
1. What you were trying to do
2. What you wanted to happen
3. What actually happened
4. What data and command line to use to make the problem recur, if applicable
```
Format code blocks or terminal copy-pastes like this, between triple backticks.
```
This diff is collapsed.
# vg
[![Join the chat at https://gitter.im/vgteam/vg](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/vgteam/vg?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Latest Release](https://img.shields.io/github/release/vgteam/vg.svg)](https://github.com/vgteam/vg/releases/latest) [![Build Status](https://travis-ci.org/vgteam/vg.svg?branch=master)](https://travis-ci.org/vgteam/vg) [![Performance Report](https://img.shields.io/badge/performance-report-brightgreen.svg)](https://vg-data.s3.amazonaws.com/vg_ci/vgci_reports/branch/master/index.html) [![Stories in Ready](https://badge.waffle.io/vgteam/vg.png?label=ready&title=Ready)](https://waffle.io/vgteam/vg)
[![Join the chat at https://gitter.im/vgteam/vg](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/vgteam/vg?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Latest Release](https://img.shields.io/github/release/vgteam/vg.svg)](https://github.com/vgteam/vg/releases/latest) [![Build Status](https://travis-ci.org/vgteam/vg.svg?branch=master)](https://travis-ci.org/vgteam/vg) [![Performance Report](https://img.shields.io/badge/performance-report-brightgreen.svg)](https://vg-data.s3.amazonaws.com/vg_ci/vgci_reports/branch/master/index.html)
[![Doxygen API Documentation](https://img.shields.io/badge/doxygen-docs-brightgreen.svg)](https://vgteam.github.io/vg/)
## variation graph data structures, interchange formats, alignment, genotyping, and variant calling methods
......@@ -13,10 +13,20 @@ _Variation graphs_ provide a succinct encoding of the sequences of many genomes.
* _edges_, which connect two nodes via either of their respective ends
* _paths_, describe genomes, sequence alignments, and annotations (such as gene models and transcripts) as walks through nodes connected by edges
This model is similar to a number of sequence graphs that have been used in assembly and multiple sequence alignment. Paths provide coordinate systems relative to genomes encoded in the graph, allowing stable mappings to be produced even if the structure of the graph is changed.
This model is similar to sequence graphs that have been used in assembly and multiple sequence alignment.
Paths provide coordinate systems relative to genomes encoded in the graph, allowing stable mappings to be produced even if the structure of the graph is changed.
The variation graph model makes this embedding explicit and essential.
Tools in vg maintain paths as immutable during transformations of the graph.
They use paths to project graph-relative data into reference-relative coordinate spaces.
Paths provide stable coordinates for graphs built in different ways from the same input sequences.
![example variation graph](https://raw.githubusercontent.com/vgteam/vg/master/doc/figures/smallgraph.png)
## Support
We maintain a support forum on biostars: https://www.biostars.org/t/vg/
## Installation
### Download Releases
......@@ -44,15 +54,18 @@ Then, install VG's dependencies. You'll need the protobuf and jansson developmen
On other distros, you will need to perform the equivalent of:
sudo apt-get install build-essential git cmake pkg-config libncurses-dev libbz2-dev \
protobuf-compiler libprotoc-dev libjansson-dev automake libtool \
jq bc rs curl unzip redland-utils librdf-dev bison flex gawk \
lzma-dev liblzma-dev liblz4-dev libffi-dev libcairo-dev
protobuf-compiler libprotoc-dev libprotobuf-dev libjansson-dev \
automake libtool jq bc rs curl unzip redland-utils \
librdf-dev bison flex gawk lzma-dev liblzma-dev liblz4-dev \
libffi-dev libcairo-dev
Note that **Ubuntu 16.04** does not ship a sufficiently new Protobuf; vg requires **Protobuf 3** which will have to be manually installed.
At present, you will need GCC version 4.9 or greater to compile vg. (Check your version with `gcc --version`.)
Other libraries may be required. Please report any build difficulties.
Note that a 64-bit OS is required. Ubuntu 16.04 should work. You will also need a CPU that supports SSE 4.2 to run VG; you can check this with `cat /proc/cpuinfo | grep sse4_2`.
Note that a 64-bit OS is required. Ubuntu 18.04 should work. You will also need a CPU that supports SSE 4.2 to run VG; you can check this with `cat /proc/cpuinfo | grep sse4_2`.
When you are ready, build with `. ./source_me.sh && make`, and run with `./bin/vg`.
......@@ -75,7 +88,7 @@ VG depends on a number of packages being installed on the system where it is bei
You can use MacPorts to install VG's dependencies:
sudo port install libtool jansson jq cmake pkgconfig autoconf automake libtool coreutils samtools redland bison gperftools md5sha1sum rasqal gmake autogen cairo libomp
sudo port install libtool protobuf3-cpp jansson jq cmake pkgconfig autoconf automake libtool coreutils samtools redland bison gperftools md5sha1sum rasqal gmake autogen cairo libomp
##### Using Homebrew
......@@ -144,6 +157,8 @@ The simplest thing to do with `vg` is to build a graph and align to it. At prese
vg construct -r small/x.fa -v small/x.vcf.gz >x.vg
```
Note that to build a graph, an index of the VCF file is required. The VCF index file can be generated using the `tabix` command provided by SAMtools (e.g. `tabix -p vcf x.vcf.gz` on the command line).
### Viewing, conversion
`vg view` provides a way to convert the graph into various formats:
......@@ -180,7 +195,7 @@ Most commands allow the streaming of graphs into and out of `vg`.
If your graph is large, you want to use `vg index` to store the graph and `vg map` to align reads. `vg map` implements a kmer based seed and extend alignment model that is similar to that used in aligners like novoalign or MOSAIK. First an on-disk index is built with `vg index` which includes the graph itself and kmers of a particular size. When mapping, any kmer size shorter than that used in the index can be employed, and by default the mapper will decrease the kmer size to increase sensitivity when alignment at a particular _k_ fails.
```sh
# construct the graph
# construct the graph (paths below assume running from `vg/test` directory)
vg construct -r small/x.fa -v small/x.vcf.gz > x.vg
# store the graph in the xg/gcsa index pair
......@@ -208,40 +223,99 @@ vg map -T x.sim.txt -x x.xg -g x.gcsa --surject-to bam > aln.bam
Variation from alignments can be embedded back into the graph. This process is called augmentation and is important for variant calling, for example (see below).
```sh
# augment the graph with all variation from the GAM, saving to aug.vg. aug.gam contains the same reads as aln.gam but mapped to aug.vg
# augment the graph with all variation from the GAM except that implied by soft clips, saving to aug.vg. aug.gam contains the same reads as aln.gam but mapped to aug.vg
vg augment x.vg aln.gam -A aug.gam > aug.vg
# augment the graph with all variation from the GAM, saving each mapping as a path in the graph.
# softclips of alignment paths are preserved (`-S`).
# Note, this can be much less efficient than the above example if there are many alignments in the GAM
vg augment x.vg aln.gam -i > aug_with_paths.vg
vg augment x.vg aln.gam -i -S > aug_with_paths.vg
```
### Variant Calling
The following example shows how to construct a VCF file from a read alignment and graph. Input must be split into chunks (see vg chunk) in order to run on whole genome.
#### Calling variants using read support
The following examples show how to generate a VCF with vg using read support. They depend on output from the Mapping and Augmentation examples above. Small variants and SVs can be called using the same approach. Currently, it is more accuracte for SVs.
Call only variants that are present in the graph:
```sh
# Compute the read support from the gam (ignoring mapping and base qualitiy < 5)
vg pack -x x.xg -g aln.gam -Q 5 -o aln.pack
# Generate a VCF from the support
vg call x.xg -k aln.pack > graph_calls.vcf
```
In order to also consider *novel* variants from the reads, use the augmented graph and gam (as created in the previous example using `vg augment -A`):
```sh
# Index our augmented graph
vg index aug.vg -x aug.xg
# Compute the read support from the augmented gam (with ignoring qualitiy < 5)
vg pack -x aug.xg -g aug.gam -Q 5 -o aln_aug.pack
# Generate a VCF from the support
vg call aug.xg -k aln_aug.pack > calls.vcf
```
A similar process can by used to *genotype* known variants from a VCF. To do this, the graph must be constructed from the VCF with `vg construct -a`:
```sh
# Re-construct the same graph as before but with `-a`
vg construct -r small/x.fa -v small/x.vcf.gz -a > xa.vg
# Index the graph with `-L' to preserve alt paths in the xg
vg index xa.vg -x xa.xg -L
# Compute the support (we could also reuse aln.pack from above)
vg pack -x xa.xg -g aln.gam -o aln.pack
# Genotype the VCF
vg call xa.xg -k aln.pack -v small/x.vcf.gz > genotypes.vcf
```
Pre-filtering the GAM before computing support can improve precision of SNP calling
```sh
# filter secondary and ambiguous read mappings out of the gam
vg filter alignment.gam -r 0.90 -fu -s 2 -o 0 -D 999 -x graph.xg > filtered.gam
vg filter aln.gam -r 0.90 -fu -m 1 -q 15 -D 999 -x x.xg > aln.filtered.gam
# create an augmented graph by adding variation from the reads
vg augment graph.vg filtered.gam -a pileup -S aug_graph.support -Z aug_graph.trans > aug_graph.vg
# then compute the support from aln.filtered.gam instead of aln.gam in above etc.
```
For larger graphs, it is recommended to compute snarls separately:
```sh
vg snarls x.xg > x.snarls
# load snarls from a file instead of computing on the fly
vg call x.xg -k aln.pack -r x.snarls > calls.vcf
```
# to only recall variants that are already in the graph, add -g 9999999 to the augment options above.
Note: `vg augment`, `vg pack`, `vg call` and `vg snarls` can now all be run on directly on any graph format (ex `.vg`, `.xg` (except `augment`) or anything output by `vg convert`). Operating on `.vg` uses the most memory and is not recommended for large graphs. The output of `vg pack` can only be read in conjunction with the same graph used to create it, so `vg pack x.vg -g aln.gam -o x.pack` then `vg call x.xg -k x.pack` will not work.
# Make calls by thresholding based on read support for graph path SEQ
vg call aug_graph.vg -b graph.vg -s aug_graph.support -z aug_graph.trans -r SEQ > calls.vcf
#### Calling variants from paths in the graph
Infer variants from from alignments implied by paths in the graph. This can be used, for example, to call SVs directly from a variation graph that was constructed from a multiple alignment of different assemblies:
```sh
# create a graph from a multiple alignment of HLA haplotypes (from vg/test directory)
vg msga -f GRCh38_alts/FASTA/HLA/V-352962.fa -t 1 -k 16 | vg mod -U 10 - | vg mod -c - > hla.vg
# Or Make calls using a Freebayes-like genotyping algorithm for graph path SEQ
vg genotype graph.vg -G alignment.gam -E -v -r SEQ > calls.vcf
# index it
vg index hla.vg -x hla.xg
# for comparison purposes, it's very useful to normalize the vcf output, especially for more complex graphs which can make large variant blocks that contain a lot of reference bases (Note: requires [vt](http://genome.sph.umich.edu/wiki/Vt)):
vt decompose_blocksub -a calls.vcf | vt normalize -r FASTA_FILE - > calls.clean.vcf
# generate a VCF using gi|568815592:29791752-29792749 as the reference contig. The other paths will be considered as haploid samples
vg deconstruct hla.xg -e -p "gi|568815592:29791752-29792749" > hla_variants.vcf
```
Variants can also be inferred strictly from topology by not using `-e`, though unlike the above example, cycles are not supported. "Deconstruct" the VCF variants that were used to construct the graph. The output will be similar but identical to `small/x.vcf.gz` as `vg construct` can add edges between adjacent alts and/or do some normalization:
```sh
# using the same graph from the `map` example
vg deconstruct x.xg > x.vcf
```
To produce a VCF file for a whole chromosome, the graph must be cut up along the reference genome and called in chunks. `scripts/chunked_call` wraps this functionality to produce chromosome-sized VCFs in a single command line (from a GAM file and XG index)
As with `vg call`, it is best to compute snarls separately and pass them in with `-r` when working with large graphs.
### Command line interface
......
vg (1.18.0+ds2-1) UNRELEASED; urgency=medium
vg (1.21.0+ds-1) UNRELEASED; urgency=medium
* Initial release. (Closes: #939537)
-- Michael R. Crusoe <michael.crusoe@gmail.com> Tue, 11 Dec 2018 00:22:35 -0800
-- Michael R. Crusoe <michael.crusoe@gmail.com> Sun, 12 Jan 2020 08:30:00 +0100
......@@ -60,7 +60,7 @@ Build-Depends: debhelper-compat (= 12),
# libhts-dev
# libsdsl-dev,
# libvw-dev,
Standards-Version: 4.4.0
Standards-Version: 4.4.1
Vcs-Browser: https://salsa.debian.org/med-team/vg
Vcs-Git: https://salsa.debian.org/med-team/vg.git
Homepage: https://github.com/vgteam/vg#vg
......
This diff is collapsed.
Author: Michael R. Crusoe <michael.crusoe@gmail.com>
Description: Due to other patches, snappy is no longer a dependency
--- vg.orig/Makefile
+++ vg/Makefile
@@ -174,7 +174,6 @@
--- a/Makefile
+++ b/Makefile
@@ -206,7 +206,6 @@ CONFIGURATION_OBJ =
LOCKFREE_MALLOC_DIR:=deps/lockfree-malloc
SDSL_DIR:=deps/sdsl-lite
-SNAPPY_DIR:=deps/snappy
GCSA2_DIR:=deps/gcsa2
GBWT_DIR:=deps/gbwt
PROGRESS_BAR_DIR:=deps/progress_bar
@@ -211,7 +210,6 @@
GBWTGRAPH_DIR=deps/gbwtgraph
@@ -248,7 +247,6 @@ DEP_OBJ += $(OBJ_DIR)/Fasta.o
LIB_DEPS =
LIB_DEPS += $(LIB_DIR)/libsdsl.a
LIB_DEPS += $(LIB_DIR)/libssw.a
-LIB_DEPS += $(LIB_DIR)/libsnappy.a
LIB_DEPS += $(LIB_DIR)/libgcsa2.a
LIB_DEPS += $(LIB_DIR)/libgbwt.a
LIB_DEPS += $(LIB_DIR)/libhts.a
@@ -300,9 +298,6 @@
LIB_DEPS += $(LIB_DIR)/libgbwtgraph.a
@@ -366,11 +364,6 @@ endif
$(LIB_DIR)/libssw.a: $(SSW_DIR)/*.c $(SSW_DIR)/*.h
+. ./source_me.sh && cd $(SSW_DIR) && $(MAKE) $(FILTER) && ar rs $(CWD)/$(LIB_DIR)/libssw.a ssw.o ssw_cpp.o && cp ssw_cpp.h ssw.h $(CWD)/$(LIB_DIR)
-# We need to hide -Xpreprocessor -fopenmp from Snappy, at least on Mac, because
-# it will drop the -Xpreprocessor and keep the -fopenmp and upset Clang.
-$(LIB_DIR)/libsnappy.a: $(SNAPPY_DIR)/*.cc $(SNAPPY_DIR)/*.h
- +. ./source_me.sh && cd $(SNAPPY_DIR) && ./autogen.sh && ./configure --prefix=$(CWD) $(FILTER) && $(MAKE) libsnappy.la $(FILTER) && cp .libs/libsnappy.a $(CWD)/lib/ && cp snappy-c.h snappy-sinksource.h snappy-stubs-public.h snappy.h $(CWD)/include/
- +. ./source_me.sh && cd $(SNAPPY_DIR) && ./autogen.sh && CXXFLAGS="$(filter-out -Xpreprocessor -fopenmp,$(CXXFLAGS))" ./configure --prefix=$(CWD) $(FILTER) && CXXFLAGS="$(filter-out -Xpreprocessor -fopenmp,$(CXXFLAGS))" $(MAKE) libsnappy.la $(FILTER) && cp .libs/libsnappy.a $(CWD)/lib/ && cp snappy-c.h snappy-sinksource.h snappy-stubs-public.h snappy.h $(CWD)/include/
-
$(INC_DIR)/gcsa/gcsa.h: $(LIB_DIR)/libgcsa2.a
$(LIB_DIR)/libgcsa2.a: $(LIB_DIR)/libsdsl.a $(wildcard $(GCSA2_DIR)/*.cpp) $(wildcard $(GCSA2_DIR)/include/gcsa/*.h)
$(LIB_DIR)/libgcsa2.a: $(LIB_DIR)/libsdsl.a $(LIB_DIR)/libdivsufsort.a $(LIB_DIR)/libdivsufsort64.a $(wildcard $(GCSA2_DIR)/*.cpp) $(wildcard $(GCSA2_DIR)/include/gcsa/*.h)
Author: Michael R. Crusoe <michael.crusoe@gmail.com>
Description: Ensure that we don't accidentally include profile mode
--- vg.orig/deps/gfakluge/Makefile
+++ vg/deps/gfakluge/Makefile
@@ -1,5 +1,5 @@
CXX?=g++
-CXXFLAGS:=-O3 -pipe -fPIC -march=native -mtune=native -Wall -std=c++11 -pg -ggdb
+CXXFLAGS:=-O3 -pipe -fPIC -march=native -mtune=native -Wall -std=c++11 -ggdb
# We want to pass -Wa,-q to GCC use the Clang assembler, but Apple Clang can't take that
# So we do an environment variable instead
export AS_INTEGRATED_ASSEMBLER=1
--- vg.orig/deps/gfakluge/makefile.detuned
+++ vg/deps/gfakluge/makefile.detuned
--- a/deps/gfakluge/makefile.detuned
+++ b/deps/gfakluge/makefile.detuned
@@ -1,5 +1,5 @@
CXX?=g++
-CXXFLAGS:=-O0 -pipe -std=c++11 -pg -ggdb
......@@ -18,9 +9,9 @@ Description: Ensure that we don't accidentally include profile mode
BIN_DIR:=bin
--- vg.orig/deps/sonLib/include.mk
+++ vg/deps/sonLib/include.mk
@@ -54,7 +54,7 @@
--- a/deps/sonLib/include.mk
+++ b/deps/sonLib/include.mk
@@ -54,7 +54,7 @@ cflags_ultraDbg = -Wall -Werror --pedant
cppflags_ultraDbg = -g -O1 -fno-inline -fno-omit-frame-pointer -fsanitize=address
#Profile flags
......@@ -29,9 +20,9 @@ Description: Ensure that we don't accidentally include profile mode
#Flags to use
ifndef CGL_DEBUG
--- vg.orig/deps/vcflib/Makefile
+++ vg/deps/vcflib/Makefile
@@ -158,7 +158,7 @@
--- a/deps/vcflib/Makefile
+++ b/deps/vcflib/Makefile
@@ -158,7 +158,7 @@ profiling:
$(MAKE) CXXFLAGS="$(CXXFLAGS) -g" all
gprof:
......@@ -40,9 +31,9 @@ Description: Ensure that we don't accidentally include profile mode
$(OBJECTS): $(SOURCES) $(HEADERS) $(TABIX) multichoose pre $(SMITHWATERMAN) $(FILEVERCMP) $(FASTAHACK)
$(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) && cp src/*.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/
--- vg.orig/deps/vowpal_wabbit/Makefile
+++ vg/deps/vowpal_wabbit/Makefile
@@ -103,7 +103,7 @@
--- a/deps/vowpal_wabbit/Makefile
+++ b/deps/vowpal_wabbit/Makefile
@@ -103,7 +103,7 @@ vw:
cd vowpalwabbit; $(MAKE) -j $(NPROCS) things
#Target-specific flags for a profiling build. (Copied from line 70)
......@@ -51,7 +42,7 @@ Description: Ensure that we don't accidentally include profile mode
vw_gcov: CXX = g++
vw_gcov:
cd vowpalwabbit && env LDFLAGS="-fprofile-arcs -ftest-coverage -lgcov"; $(MAKE) -j $(NPROCS) things
@@ -115,7 +115,7 @@
@@ -115,7 +115,7 @@ library_example: vw
cd library; $(MAKE) -j $(NPROCS) things
#Target-specific flags for a profiling build. (Copied from line 70)
......
Author: Michael R. Crusoe <michael.crusoe@gmail.com>
Description: don't use rpath
--- vg.orig/Makefile
+++ vg/Makefile
@@ -100,8 +100,6 @@
--- a/Makefile
+++ b/Makefile
@@ -128,8 +128,6 @@ ifeq ($(shell uname -s),Darwin)
else
# We are not running on OS X
......
......@@ -4,7 +4,6 @@ use_packaged_jemalloc
use_packaged_libbackward-cpp
use_packaged_rocksdb
use_packaged_elfutils
use_packaged_protobuf
use_packaged_raptor2
no_need_for_snappy
use_packaged_sparsehash
......