Commit 42313f68 authored by Andreas Tille's avatar Andreas Tille

Imported Upstream version 1.0.0~rc1

parent d97fda1a
......@@ -116,3 +116,7 @@ bin/vcfleftalign
bin/vcfqual2info
bin/vcfsample2info
libvcflib.a
lib/
bin/
obj/
include/
language: cpp
sudo: required
compiler:
- gcc
before_install:
- sudo apt-get install g++
- sudo apt-get install build-essential
- git submodule update --init --recursive
os:
- linux
script: make
\ No newline at end of file
This diff is collapsed.
import argparse, csv, os, sys
parser=argparse.ArgumentParser(description="Determines the index of individuals\
of a given ethnicity within a 1000 Genomes VCF")
parser.add_argument("VCF", type=str, help="VCF of 1000 Genomes individuals")
parser.add_argument("Population", type=str, help="1KG identifier for population\
to be found in the index, enter \"ALL|\" to print index for all populations in \
the VCF")
arg=parser.parse_args()
a=os.path.abspath(sys.argv[0]).split("/")[:-1]
b="/".join(a)
try:
open(b+"/1KG_IDs.txt")
except IOError:
print "Missing file \"1KG_IDs.txt\" containing the IDs for 1000 Genomes"
popdict={}
with open(b+"/1KG_IDs.txt") as t:
for line in csv.reader(t,delimiter='\t'):
popdict[line[1]]=line[0]
with open(arg.VCF) as t:
for line in csv.reader(t,delimiter='\t'):
if line[0]=='#CHROM':
for j in range(len(line)):
if 'NA' in line[j] or 'HG' in line[j]:
vcfind=line[j:]
break
indict={}
for i in range(len(vcfind)):
try:
eth=popdict[vcfind[i]]
except:
print"Non 1000 Genomes individuals found in VCF"
if eth in indict.keys():
indict[eth].append(i)
else:
indict[eth]=[i]
if arg.Population=="All":
for j in indict.keys():
l=[str(i) for i in indict[j]]
print j,"=",",".join(l)
else:
try:
indict[arg.Population]
except IOError:
print "Population not in VCF or invalid population ID"
l=[str(i) for i in indict[arg.Population]]
print ",".join(l)
This is an accessory script to find the index of 1000 Genome individuals for a specific population within a 1000 Genomes derived VCF.
\ No newline at end of file
#OBJ_DIR = ./
HEADERS = src/Variant.h \
src/split.h \
src/pdflib.hpp \
src/var.hpp \
src/cdflib.hpp \
src/rnglib.hpp \
src/join.h
SOURCES = src/Variant.cpp \
src/rnglib.cpp \
src/var.cpp \
src/pdflib.cpp \
src/cdflib.cpp \
src/split.cpp
OBJECTS= $(SOURCES:.cpp=.o)
VCF_LIB_LOCAL:=$(shell pwd)
BIN_DIR:=bin
LIB_DIR:=lib
SRC_DIR=src
INC_DIR:=include
OBJ_DIR:=obj
# TODO
#vcfstats.cpp
BIN_SOURCES = src/vcfecho.cpp \
src/dumpContigsFromHeader.cpp \
src/bFst.cpp \
src/hapLrt.cpp \
src/popStats.cpp \
src/wcFst.cpp \
src/iHS.cpp \
src/segmentFst.cpp \
src/segmentIhs.cpp \
src/genotypeSummary.cpp \
src/sequenceDiversity.cpp \
src/pFst.cpp \
src/smoother.cpp \
src/LD.cpp \
src/plotHaps.cpp \
src/abba-baba.cpp \
src/permuteGPAT++.cpp \
src/permuteSmooth.cpp \
src/normalize-iHS.cpp \
src/meltEHH.cpp \
src/vcfaltcount.cpp \
src/vcfhetcount.cpp \
src/vcfhethomratio.cpp \
......@@ -82,7 +116,7 @@ SHORTBINS = $(notdir $(BIN_SOURCES:.cpp=))
TABIX = tabixpp/tabix.o
FASTAHACK = fastahack/Fasta.o
SMITHWATERMAN = smithwaterman/SmithWatermanGotoh.o
SMITHWATERMAN = smithwaterman/SmithWatermanGotoh.o
REPEATS = smithwaterman/Repeats.o
INDELALLELE = smithwaterman/IndelAllele.o
DISORDER = smithwaterman/disorder.o
......@@ -90,14 +124,17 @@ LEFTALIGN = smithwaterman/LeftAlign.o
FSOM = fsom/fsom.o
FILEVERCMP = filevercmp/filevercmp.o
INCLUDES = -I. -Itabixpp/htslib/ -L. -Ltabixpp/ -Ltabixpp/htslib/
LDFLAGS = -lvcflib -ltabix -lhts -lpthread -lz -lm
INCLUDES = -Itabixpp/htslib -I$(INC_DIR) -L. -Ltabixpp/htslib
LDFLAGS = -L$(LIB_DIR) -lvcflib -lhts -lpthread -lz -lm -fopenmp
all: $(OBJECTS) $(BINS)
GIT_VERSION := $(shell git describe --abbrev=4 --dirty --always)
CXX = g++
CXXFLAGS = -O3 -D_FILE_OFFSET_BITS=64
CXXFLAGS = -O3 -D_FILE_OFFSET_BITS=64 -std=c++0x -DVERSION=\"$(GIT_VERSION)\"
#CXXFLAGS = -O2
#CXXFLAGS = -pedantic -Wall -Wshadow -Wpointer-arith -Wcast-qual
......@@ -115,14 +152,20 @@ profiling:
gprof:
$(MAKE) CXXFLAGS="$(CXXFLAGS) -pg" all
$(OBJECTS): $(SOURCES) $(HEADERS) $(TABIX)
$(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS)
$(OBJECTS): $(SOURCES) $(HEADERS) $(TABIX) multichoose pre $(SMITHWATERMAN) $(FILEVERCMP)
$(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) && cp src/*.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/
multichoose: pre
cd multichoose && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/
intervaltree: pre
cd intervaltree && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/
$(TABIX):
cd tabixpp && $(MAKE)
$(TABIX): pre
cd tabixpp && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/
$(SMITHWATERMAN):
cd smithwaterman && $(MAKE)
$(SMITHWATERMAN): pre
cd smithwaterman && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && cp *.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/
$(DISORDER): $(SMITHWATERMAN)
......@@ -132,34 +175,51 @@ $(LEFTALIGN): $(SMITHWATERMAN)
$(INDELALLELE): $(SMITHWATERMAN)
$(FASTAHACK):
cd fastahack && $(MAKE)
$(FASTAHACK): pre
cd fastahack && $(MAKE) && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && cp Fasta.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/
#$(FSOM):
# cd fsom && $(CXX) $(CXXFLAGS) -c fsom.c -lm
$(FILEVERCMP):
cd filevercmp && make
$(FILEVERCMP): pre
cd filevercmp && make && cp *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && cp *.o $(VCF_LIB_LOCAL)/$(INC_DIR)/
$(SHORTBINS):
$(SHORTBINS): pre
$(MAKE) bin/$@
$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(SMITHWATERMAN) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP)
$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(SMITHWATERMAN) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) pre intervaltree
$(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS)
libvcflib.a: $(OBJECTS) $(SMITHWATERMAN) $(REPEATS) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) $(TABIX)
libvcflib.a: $(OBJECTS) $(SMITHWATERMAN) $(REPEATS) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) $(TABIX) pre
ar rs libvcflib.a $(OBJECTS) smithwaterman/sw.o $(FASTAHACK) $(SSW) $(FILEVERCMP) $(TABIX)
cp libvcflib.a $(LIB_DIR)
test: $(BINS)
@prove -Itests/lib -w tests/*.t
pre:
if [ ! -d $(BIN_DIR) ]; then mkdir -p $(BIN_DIR); fi
if [ ! -d $(LIB_DIR) ]; then mkdir -p $(LIB_DIR); fi
if [ ! -d $(INC_DIR) ]; then mkdir -p $(INC_DIR); fi
if [ ! -d $(OBJ_DIR) ]; then mkdir -p $(OBJ_DIR); fi
pull:
git pull
update: pull all
clean:
rm -f $(BINS) $(OBJECTS)
rm -f ssw_cpp.o ssw.o
rm -f libvcflib.a
rm -rf $(BIN_DIR)
rm -rf $(LIB_DIR)
rm -rf $(INC_DIR)
rm -rf $(OBJ_DIR)
cd tabixpp && make clean
cd smithwaterman && make clean
cd fastahack && make clean
.PHONY: clean all test
.PHONY: clean all test pre
#OBJ_DIR = ./
HEADERS = src/Variant.h \
src/split.h \
src/join.h
SOURCES = src/Variant.cpp \
src/split.cpp
OBJECTS= $(SOURCES:.cpp=.o)
# TODO
#vcfstats.cpp
BIN_SOURCES = src/vcfecho.cpp \
src/vcfaltcount.cpp \
src/vcfhetcount.cpp \
src/vcfhethomratio.cpp \
src/vcffilter.cpp \
src/vcf2tsv.cpp \
src/vcfgenotypes.cpp \
src/vcfannotategenotypes.cpp \
src/vcfcommonsamples.cpp \
src/vcfremovesamples.cpp \
src/vcfkeepsamples.cpp \
src/vcfsamplenames.cpp \
src/vcfgenotypecompare.cpp \
src/vcffixup.cpp \
src/vcfclassify.cpp \
src/vcfsamplediff.cpp \
src/vcfremoveaberrantgenotypes.cpp \
src/vcfrandom.cpp \
src/vcfparsealts.cpp \
src/vcfstats.cpp \
src/vcfflatten.cpp \
src/vcfprimers.cpp \
src/vcfnumalt.cpp \
src/vcfcleancomplex.cpp \
src/vcfintersect.cpp \
src/vcfannotate.cpp \
src/vcfallelicprimitives.cpp \
src/vcfoverlay.cpp \
src/vcfaddinfo.cpp \
src/vcfkeepinfo.cpp \
src/vcfkeepgeno.cpp \
src/vcfafpath.cpp \
src/vcfcountalleles.cpp \
src/vcflength.cpp \
src/vcfdistance.cpp \
src/vcfrandomsample.cpp \
src/vcfentropy.cpp \
src/vcfglxgt.cpp \
src/vcfroc.cpp \
src/vcfcheck.cpp \
src/vcfstreamsort.cpp \
src/vcfuniq.cpp \
src/vcfuniqalleles.cpp \
src/vcfremap.cpp \
src/vcf2fasta.cpp \
src/vcfsitesummarize.cpp \
src/vcfbreakmulti.cpp \
src/vcfcreatemulti.cpp \
src/vcfevenregions.cpp \
src/vcfcat.cpp \
src/vcfgenosummarize.cpp \
src/vcfgenosamplenames.cpp \
src/vcfgeno2haplo.cpp \
src/vcfleftalign.cpp \
src/vcfcombine.cpp \
src/vcfgeno2alleles.cpp \
src/vcfindex.cpp \
src/vcf2dag.cpp \
src/vcfsample2info.cpp \
src/vcfqual2info.cpp \
src/vcfinfo2qual.cpp \
src/vcfglbound.cpp \
src/vcfinfosummarize.cpp
# when we can figure out how to build on mac
# src/vcfsom.cpp
#BINS = $(BIN_SOURCES:.cpp=)
BINS = $(addprefix bin/,$(notdir $(BIN_SOURCES:.cpp=)))
SHORTBINS = $(notdir $(BIN_SOURCES:.cpp=))
TABIX = tabixpp/tabix.o
FASTAHACK = fastahack/Fasta.o
SMITHWATERMAN = smithwaterman/SmithWatermanGotoh.o
REPEATS = smithwaterman/Repeats.o
INDELALLELE = smithwaterman/IndelAllele.o
DISORDER = smithwaterman/disorder.o
LEFTALIGN = smithwaterman/LeftAlign.o
FSOM = fsom/fsom.o
FILEVERCMP = filevercmp/filevercmp.o
INCLUDES = -I. -Itabixpp/htslib/ -L. -Ltabixpp/ -Ltabixpp/htslib/
LDFLAGS = -lvcflib -lhts -lpthread -lz -lm
all: $(OBJECTS) $(BINS)
CXX = g++
CXXFLAGS = -O3 -D_FILE_OFFSET_BITS=64
#CXXFLAGS = -O2
#CXXFLAGS = -pedantic -Wall -Wshadow -Wpointer-arith -Wcast-qual
SSW = src/ssw.o src/ssw_cpp.o
ssw.o: src/ssw.h
ssw_cpp.o:src/ssw_cpp.h
openmp:
$(MAKE) CXXFLAGS="$(CXXFLAGS) -fopenmp -D HAS_OPENMP"
profiling:
$(MAKE) CXXFLAGS="$(CXXFLAGS) -g" all
gprof:
$(MAKE) CXXFLAGS="$(CXXFLAGS) -pg" all
$(OBJECTS): $(SOURCES) $(HEADERS) $(TABIX)
$(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS)
$(TABIX):
cd tabixpp && $(MAKE)
$(SMITHWATERMAN):
cd smithwaterman && $(MAKE)
$(DISORDER): $(SMITHWATERMAN)
$(REPEATS): $(SMITHWATERMAN)
$(LEFTALIGN): $(SMITHWATERMAN)
$(INDELALLELE): $(SMITHWATERMAN)
$(FASTAHACK):
cd fastahack && $(MAKE)
#$(FSOM):
# cd fsom && $(CXX) $(CXXFLAGS) -c fsom.c -lm
$(FILEVERCMP):
cd filevercmp && make
$(SHORTBINS):
$(MAKE) bin/$@
$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(SMITHWATERMAN) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP)
$(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS)
libvcflib.a: $(OBJECTS) $(SMITHWATERMAN) $(REPEATS) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) $(TABIX)
ar rs libvcflib.a $(OBJECTS) smithwaterman/sw.o $(FASTAHACK) $(SSW) $(FILEVERCMP) $(TABIX)
test: $(BINS)
@prove -Itests/lib -w tests/*.t
clean:
rm -f $(BINS) $(OBJECTS)
rm -f ssw_cpp.o ssw.o
rm -f libvcflib.a
cd tabixpp && make clean
cd smithwaterman && make clean
cd fastahack && make clean
.PHONY: clean all test
This diff is collapsed.
#!/usr/bin/env perl
while (<STDIN>) {
$_ =~ /^(.+?)\s(.+?)\s(.+)\s*/;
$chrom = $1;
$pos = $2;
$end = $3;
print $chrom . ":" . $pos . "-" . $end . "\n";
}
#usage: nohup R --vanilla < plotPfst --args pFst.txt
cmd_args <- commandArgs(trailingOnly = TRUE)
plotPfst<-function(x){
require("ggplot2")
dat<-read.table( x, header=FALSE )
dat$V2 <-dat$V2 / 1e3
theplot<-ggplot(dat, aes(x=V2, y=V9))+geom_point()+geom_segment(aes(x=V2, xend=V2, y=V10, yend=V11))+labs(x="KB position", y="Fst")+theme_grey(15)
pngName<-paste(c(x, format(Sys.time(), "%a%b%d_%H_%M_%S.png")), collapse="_")
ggsave(filename=pngName, width=20, height=4, units="in", theplot)
}
plotPfst(cmd_args)
\ No newline at end of file
#usage: nohup R --vanilla < plotPfst --args pFst.txt
cmd_args <- commandArgs(trailingOnly = TRUE)
plotPfst<-function(x){
require("ggplot2")
dat<-read.table( x, header=FALSE )
dat<-dat[dat$V5 < 0.9,]
theplot<-ggplot(dat, aes(x=V2/1e3, y=-log10(V5)*V6))+geom_point()+theme_grey(15)+labs(x="KB position", y="-log10(hapLRT * sign)")+geom_hline(aes(yintercept=0), linetype="dashed", colour="red")
pngName<-paste(c(x, format(Sys.time(), "%a%b%d_%H_%M_%S.png")), collapse="_")
ggsave(filename=pngName, width=20, height=4, units="in", theplot)
}
plotPfst(cmd_args)
\ No newline at end of file
#usage: nohup R --vanilla < plotPfst --args plotHapOutput.txt
cmd_args <- commandArgs(trailingOnly = TRUE)
imageHap<-function(x){
pngName<-paste(c(x, format(Sys.time(), "%a%b%d_%H_%M_%S.pdf")), collapse="_")
dat<-read.table( x[1], header=FALSE )
pos<-dat[,1]
dat<-dat[,2:length(dat)]
print(head(dat))
hd<-dist(t(dat), method="binary")
or<-hclust(hd)
or$labels<-1:length(dat)
print(or$labels)
pdf(pngName, width=9, height=8)
par(mfrow=c(2,1))
image(1-as.matrix(dat[,or$order]), yaxt="n", xaxt="n", ylab="Haplotypes", xlab="SNP", cex.lab=1.1)
plot(or, main="", cex=1.1, lwd=2, sub="", xlab="")
dev.off()
}
imageHap(cmd_args)
\ No newline at end of file
#usage: nohup R --vanilla < plotPfst --args pFst.txt
cmd_args <- commandArgs(trailingOnly = TRUE)
plotPfst<-function(x){
require("ggplot2")
dat<-read.table( x, header=FALSE )
dat$V2<-1:length(dat$V2)
dat<-dat[dat$V3 < 0.9,]
theplot<-ggplot(dat, aes(x=V2, y=-log10(V3)))+geom_point()+theme_grey(15)+labs(x="SNP index", y="-log10(pFst)")
pngName<-paste(c(x, format(Sys.time(), "%a%b%d_%H_%M_%S.png")), collapse="_")
ggsave(filename=pngName, width=20, height=4, units="in", theplot)
}
plotPfst(cmd_args)
\ No newline at end of file
#usage: nohup R --vanilla < plotPfst --args smoothedpFst.txt wcFst|pFst|abba-baba
cmd_args <- commandArgs(trailingOnly = TRUE)
plotPfst<-function(x){
require("ggplot2")
dat<-read.table( x[1], header=FALSE )
dat$V2<-1:length(dat$V2)
pngName<-paste(c(x, format(Sys.time(), "%a%b%d_%H_%M_%S.png")), collapse="_")
theplot<-NULL
if(x[2] == "pFst"){
theplot<-ggplot(dat, aes(x=V2, y=-log10(V5), colour=V4))+geom_point()+theme_grey(15)+labs(x="SNP index", y="-log10(smoothed pFst)")+scale_colour_continuous(low="grey", high="red", name="variants in window")
}
if(x[2] == "wcFst"){
theplot<-ggplot(dat, aes(x=V2, y=V5, colour=V4))+geom_point()+theme_grey(15)+labs(x="SNP index", y="smoothed wcFst")+scale_colour_continuous(low="grey", high="red", name="variants in window")
}
if(x[2] == "xpEHH"){
theplot<-ggplot(dat, aes(x=V2, y=V5, colour=V4))+geom_point()+theme_grey(15)+labs(x="SNP index", y="smoothed xpEHH")+scale_colour_continuous(low="grey", high="red", name="variants in window")
}
if(x[2] == "abba-baba"){
theplot<-ggplot(dat, aes(x=V3, y=V5, colour=V4))+geom_point()+theme_grey(15)+labs(x="SNP index", y="D-statistic")+scale_colour_continuous(low="grey", high="red", name="variants in window")
}
ggsave(filename=pngName, width=20, height=4, units="in", theplot)
}
plotPfst(cmd_args)
#usage: nohup R --vanilla < plotPfst --args pFst.txt
cmd_args <- commandArgs(trailingOnly = TRUE)
plotPfst<-function(x){
require("ggplot2")
dat<-read.table( x, header=FALSE )
dat$V2<-1:length(dat$V2)
theplot<-ggplot(dat, aes(x=V2, y=V5))+geom_point()+theme_grey(15)+labs(x="SNP index", y="wcFst")+ylim(0,1.1)
pngName<-paste(c(x, format(Sys.time(), "%a%b%d_%H_%M_%S.png")), collapse="_")
ggsave(filename=pngName, width=20, height=4, units="in", theplot)
}
plotPfst(cmd_args)
\ No newline at end of file
#usage: nohup R --vanilla < plotPfst --args pFst.txt
cmd_args <- commandArgs(trailingOnly = TRUE)
plotPfst<-function(x){
require("ggplot2")
dat<-read.table( x, header=FALSE )
dat$V2 <-dat$V2 / 1e3
theplot<-ggplot(dat, aes(x=V2, y=V6))+geom_point()+labs(x="KB position", y="raw XPEHH")+theme_grey(15)
pngName<-paste(c(x, format(Sys.time(), "%a%b%d_%H_%M_%S.png")), collapse="_")
ggsave(filename=pngName, width=20, height=4, units="in", theplot)
}
plotPfst(cmd_args)
\ No newline at end of file
#!/usr/bin/env Rscript
require(plyr)
require(ggplot2)
require(pracma)
require(grid)
argv <- commandArgs(trailingOnly = TRUE)
prefix <- gsub("\\s","", argv[1])
print(prefix)
truthset <- argv[2]
print(truthset)
results <- argv[3]
print(results)
xmin <- as.numeric(argv[4])
xmax <- as.numeric(argv[5])
ymin <- as.numeric(argv[6])
ymax <- as.numeric(argv[7])
roc <- read.delim(results)
bests <- ddply(roc, .(set), function(x) { data.frame(best_snps=with(x, min(false_negative_snps + false_positive_snps)), best_snp_threshold=min(subset(x, (false_negative_snps + false_positive_snps) == with(x, min(false_negative_snps + false_positive_snps)))$threshold ), best_indels=with(x, min(false_negative_indels + false_positive_indels)), best_indel_threshold=min(subset(x, (false_negative_indels + false_positive_indels) == with(x, min(false_negative_indels + false_positive_indels)))$threshold )) })
write.table(bests, paste(prefix, ".bests.tsv", sep=""), row.names=FALSE, quote=FALSE, sep="\t")
#abs(trapz(c(1, roc$complexfpr), c(1, roc$complextpr)))
true_snps <- with(subset(roc, set==truthset), max(num_snps))
true_indels <- with(subset(roc, set==truthset), max(num_indels))
# get ROC AUC
auc <- ddply(roc, .(set),
function(x) {
data.frame(
snp_auc=ifelse(true_snps>0,
with(x,
abs(trapz(c(1,
false_positive_snps/(false_positive_snps+ max(false_negative_snps + num_snps - false_positive_snps))),
c(max(1- false_negative_snps/true_snps),
1- false_negative_snps/true_snps)))),
0),
indel_auc=ifelse(true_indels>0,
with(x,
abs(trapz(c(1,
false_positive_indels/(false_positive_indels+ max(false_negative_indels + num_indels - false_positive_indels))),
c(max(1- false_negative_indels/true_indels),
1- false_negative_indels/true_indels)))),
0)
)
}
)
write.table(auc, paste(prefix, ".auc.tsv", sep=""), row.names=FALSE, quote=FALSE, sep="\t")
rocsnps <- ddply(roc, .(set),
function(x) {
data.frame(
FPR=
with(x,
c(1,
false_positive_snps/(false_positive_snps+ max(false_negative_snps + num_snps - false_positive_snps)))),
TPR=
with(x,
c(max(1- false_negative_snps/true_snps),
1- false_negative_snps/true_snps)),
type=as.factor("snps")
)
}
)
rocindels <- ddply(roc, .(set),
function(x) {
data.frame(
FPR=
with(x,
c(1,
false_positive_indels/(false_positive_indels+ max(false_negative_indels + num_indels - false_positive_indels)))),
TPR=
with(x,
c(max(1- false_negative_indels/true_indels),
1- false_negative_indels/true_indels)),
type=as.factor("indels")
)
}
)
if (FALSE) {
if (true_snps>0) {
ggplot(subset(roc, set != truthset),
aes(false_positive_snps/(false_positive_snps+with(subset(roc, set==set), max(false_negative_snps + num_snps - false_positive_snps))),
1- false_negative_snps/with(subset(roc, set==set), max(false_negative_snps + num_snps - false_positive_snps)),
group=set,
color=set)) + scale_x_continuous("false positive rate") + scale_y_continuous("true positive rate") + geom_path() + theme_bw()
+ coord_cartesian(xlim=c(xmin,xmax), ylim=c(ymin,ymax))
ggsave(paste(prefix, ".snps.png", sep=""), height=6, width=9)
}
if (true_indels>0) {
ggplot(subset(roc, set != truthset),
aes(false_positive_indels/(false_positive_indels+with(subset(roc, set==set), max(false_negative_indels + num_indels - false_positive_indels))),
1- false_negative_indels/with(subset(roc, set==set), max(false_negative_indels + num_indels - false_positive_indels)),
group=set,
color=set)) + scale_x_continuous("false positive rate") + scale_y_continuous("true positive rate") + geom_path() + theme_bw()
+ coord_cartesian(xlim=c(xmin,xmax), ylim=c(ymin,ymax))
ggsave(paste(prefix, ".indels.png", sep=""), height=6, width=9)
}