Skip to content
Commits on Source (7)
......@@ -34,7 +34,7 @@ General help about the building process's configuration step can be acquired via
make
make install
```
You many need `sudo` permissions to run `make install`.
You may need `sudo` permissions to run `make install`.
### Build from GitHub
......
......@@ -18,9 +18,9 @@ AC_PROG_CPP
# Checks for perl.
AC_PATH_PROGS([PERL], [perl] [perl5], [false])
if test "x$PERL" = xfalse ; then
AS_IF([test "x$PERL" = "xfalse"],[
AC_MSG_ERROR([Perl not found; check your \$PATH.])
fi
])
pmdir_relative_path=`\
$PERL -MConfig \
......@@ -34,12 +34,12 @@ AC_ARG_WITH(
[--with-pmdir=DIR],
[install Perl modules in DIR]),
[PMDIR=${withval}],
[PMDIR='${prefix}'/"$pmdir_relative_path"])
[PMDIR="$pmdir_relative_path"])
AC_SUBST([PMDIR])
# Checks for libraries.
PKG_CHECK_MODULES(ZLIB, zlib)
PKG_CHECK_MODULES([ZLIB], [zlib])
# Checks for header files.
AC_CHECK_HEADERS([arpa/inet.h fcntl.h limits.h netdb.h stdint.h stdlib.h string.h sys/socket.h unistd.h])
......@@ -73,9 +73,9 @@ AC_ARG_ENABLE([pca],
[pca=${enableval}],
[pca=no])
if test "x${pca}" = "xyes" ; then
AC_CHECK_LIB(lapack, dgeev_)
fi
AS_IF([test "x$pca" = "xyes"],[
AC_SEARCH_LIBS([dgeev_], [lapack])
])
# Generate output.
AC_CONFIG_FILES([Makefile
......
vcftools (0.1.14+dfsg-5) UNRELEASED; urgency=medium
vcftools (0.1.15-1) unstable; urgency=medium
* New upstream version
[ Steffen Moeller ]
* debian/upstream/metadata:
- added references to registries
- yamllint cleanliness
-- Steffen Moeller <moeller@debian.org> Sun, 10 Sep 2017 12:31:38 +0200
[ Andreas Tille ]
* Standards-Version: 4.1.3
* debhelper 11
* d/rules: do not parse d/changelog
-- Andreas Tille <tille@debian.org> Sun, 18 Feb 2018 20:08:00 +0100
vcftools (0.1.14+dfsg-4) unstable; urgency=medium
......
......@@ -4,11 +4,10 @@ Uploaders: Thorsten Alteholz <debian@alteholz.de>,
Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 10),
dh-autoreconf,
Build-Depends: debhelper (>= 11),
pkg-config,
zlib1g-dev
Standards-Version: 3.9.8
Standards-Version: 4.1.3
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/vcftools.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/vcftools.git
Homepage: https://vcftools.github.io/
......
......@@ -6,10 +6,10 @@ Description: Fix manpage syntax
+++ b/src/cpp/vcftools.1
@@ -1,7 +1,7 @@
.\" Manpage for vcftools.
-.TH vcftools man page 1 "27 August 2014" "0.1.13" "vcftools man page"
+.TH VCFTOOLS "1" "27 August 2014" "0.1.13" "vcftools man page"
-.TH vcftools man page 1 "05 January 2016" "0.1.14" "vcftools man page"
+.TH VCFTOOLS "1" "05 January 2017" "0.1.15" "vcftools man page"
.SH NAME
-vcftools v0.1.13 \- Utilities for the variant call format (VCF) and binary variant call format (BCF)
-vcftools v0.1.14 \- Utilities for the variant call format (VCF) and binary variant call format (BCF)
+vcftools \- Utilities for the variant call format (VCF) and binary variant call format (BCF)
.SH SYNOPSIS
.B vcftools
......
......@@ -6,14 +6,15 @@ export DH_VERBOSE=1
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
DEBPKGNAME := $(shell dpkg-parsechangelog | awk '/^Source:/ {print $$2}')
sampledir:=$(CURDIR)/debian/$(DEBPKGNAME)/usr/share/doc/$(DEBPKGNAME)/examples
include /usr/share/dpkg/default.mk
sampledir:=$(CURDIR)/debian/$(DEB_SOURCE)/usr/share/doc/$(DEB_SOURCE)/examples
%:
dh $@ --with autoreconf
dh $@
override_dh_auto_configure:
dh_auto_configure -- --with-pmdir=/usr/share/perl5
dh_auto_configure -- --with-pmdir=/share/perl5
override_dh_installchangelogs:
dh_installchangelogs src/perl/ChangeLog
......
bin_PROGRAMS = vcftools
vcftools_CPPFLAGS = $(ZLIB_CPPFLAGS)
vcftools_CPPFLAGS = $(ZLIB_CFLAGS)
vcftools_LDADD = $(ZLIB_LIBS)
vcftools_SOURCES = \
......
......@@ -86,6 +86,7 @@ parameters::parameters(int argc, char *argv[])
output_as_IMPUTE = false;
output_as_ldhat_phased = false;
output_as_ldhat_unphased = false;
output_as_ldhelmet = false;
output_BEAGLE_genotype_likelihoods_GL = false;
output_BEAGLE_genotype_likelihoods_PL = false;
output_counts = false;
......@@ -144,7 +145,9 @@ parameters::parameters(int argc, char *argv[])
stream_in = false;
stream_out = false;
suppress_allele_output = false;
temp_dir = "/tmp/";
const char* raw = getenv("TMPDIR"); // Get environment variable
temp_dir = raw?raw:""; // Handle case where TMPDIR is NULL.
if(temp_dir.empty()) temp_dir = "/tmp/";
vcf_filename="";
vcf_format = false;
vcf_compressed = false;
......@@ -269,6 +272,7 @@ void parameters::read_parameters()
else if (in_str == "--ld-window-min") { ld_snp_window_min = atoi(get_arg(i+1).c_str()); i++; } // Max SNP distance for LD output
else if (in_str == "--ldhat-geno") { output_as_ldhat_unphased = true; num_outputs++;}
else if (in_str == "--ldhat") { output_as_ldhat_phased = true; phased_only = true; num_outputs++;} // Output as LDhat format
else if (in_str == "--ldhelmet") { output_as_ldhelmet = true; phased_only = true; remove_indels = true; num_outputs++; } // Output as LDhelmet format
else if (in_str == "--LROH") {output_LROH = true; num_outputs++;}
else if (in_str == "--mac") { min_mac = atoi(get_arg(i+1).c_str()); i++; } // Minimum Site MAC
else if (in_str == "--maf") { min_maf = atof(get_arg(i+1).c_str()); i++; } // Minimum Site MAF
......@@ -411,7 +415,7 @@ void parameters::print_params()
if (indv_exclude_files.size() != 0)
{
for (unsigned int ui=0; ui<indv_exclude_files.size(); ui++)
LOG.printLOG("\t--exclude " + indv_exclude_files[ui] + "\n");
LOG.printLOG("\t--remove " + indv_exclude_files[ui] + "\n");
}
if (indv_keep_files.size() != 0)
{
......@@ -520,6 +524,7 @@ void parameters::print_params()
if (temp_dir != defaults.temp_dir) LOG.printLOG("\t--temp " + temp_dir + "\n");
if (output_Tajima_D_bin_size != defaults.output_Tajima_D_bin_size) LOG.printLOG("\t--TajimaD " + output_log::int2str(output_Tajima_D_bin_size) + "\n");
if (output_as_ldhat_phased) LOG.printLOG("\t--ldhat\n");
if (output_as_ldhelmet) LOG.printLOG("\t--ldhelmet\n");
if (output_as_ldhat_unphased) LOG.printLOG("\t--ldhat-geno\n");
if (site_filter_flags_to_exclude.size() > 0)
......@@ -709,7 +714,7 @@ void parameters::check_parameters()
if (max_alleles < min_alleles) error("Max Number of Alleles must be greater than Min Number of Alleles.", 6);
if (max_mean_depth < min_mean_depth) error("Max Mean Depth must be greater the Min Mean Depth.", 7);
if (max_genotype_depth < min_genotype_depth) error("Max Genotype Depth must be greater than Min Genotype Depth.", 9);
if (((output_as_ldhat_phased == true) || (output_as_ldhat_unphased)) && (chrs_to_keep.size() != 1)) error("Require a chromosome (--chr) when outputting LDhat format.", 11);
if (((output_as_ldhat_phased == true) || (output_as_ldhat_unphased) || (output_as_ldhelmet)) && (chrs_to_keep.size() != 1)) error("Require a chromosome (--chr) when outputting LDhat format.", 11);
if ((output_BEAGLE_genotype_likelihoods_GL == true) && (chrs_to_keep.size() != 1)) error("Require a chromosome (--chr) when outputting Beagle likelihoods.", 11);
if ((output_BEAGLE_genotype_likelihoods_PL == true) && (chrs_to_keep.size() != 1)) error("Require a chromosome (--chr) when outputting Beagle likelihoods.", 11);
if (min_kept_mask_value > 9) error("Min Mask value must be between 0 and 9.", 14);
......
......@@ -113,6 +113,7 @@ public:
bool output_as_IMPUTE;
bool output_as_ldhat_phased;
bool output_as_ldhat_unphased;
bool output_as_ldhelmet;
bool output_BEAGLE_genotype_likelihoods_GL;
bool output_BEAGLE_genotype_likelihoods_PL;
bool output_counts;
......
......@@ -130,6 +130,7 @@ public:
void output_as_IMPUTE(const parameters &params);
void output_as_LDhat_phased(const parameters &params);
void output_as_LDhat_unphased(const parameters &params);
void output_as_LDhelmet(const parameters &params);
void output_FORMAT_information(const parameters &params);
void output_weir_and_cockerham_fst(const parameters &params);
......
......@@ -979,6 +979,183 @@ void variant_file::output_as_LDhat_unphased(const parameters &params)
sites.close();
}
// Output LDhelmet format
void variant_file::output_as_LDhelmet(const parameters &params)
{
if (meta_data.has_genotypes == false)
LOG.error("Require Genotypes in VCF file in order to output LDhelmet format.");
LOG.printLOG("Outputting in LDhelmet format\n");
unsigned int n_snps = 0;
int max_pos = -1;
int ret = -1;
string new_tmp = params.temp_dir+"/vcftools.XXXXXX";
char tmpname[new_tmp.size()];
strcpy(tmpname, new_tmp.c_str());
ret = mkstemp(tmpname);
string pos_tmp_filename(tmpname);
if (ret == -1)
LOG.error(" Could not open temporary file.\n", 12);
::close(ret);
ofstream pos_tmp_file(tmpname, std::ios::out | std::ios::binary);
string snps_file = params.output_prefix + ".ldhelmet.snps";
string pos_file = params.output_prefix + ".ldhelmet.pos";
ofstream snps(snps_file.c_str());
if (!snps.is_open())
LOG.error("Could not open LDhelmet snps Output File: " + snps_file, 2);
ofstream pos(pos_file.c_str());
if (!pos.is_open())
LOG.error("Could not open LDhelmet pos Output File: " + pos_file, 2);
unsigned int n_indv = N_kept_individuals();
pair<int, int> genotypes;
vector<string> alleles;
vector<ofstream *> tmp_files(2*meta_data.N_indv);
vector<string> tmp_filenames(2*meta_data.N_indv);
for (unsigned int ui=0; ui<meta_data.N_indv; ui++)
{
if (include_indv[ui] == false)
continue;
char tmpname[new_tmp.size()];
strcpy(tmpname, new_tmp.c_str());
ret = mkstemp(tmpname);
ofstream *tmp_file = new ofstream(tmpname);
if (ret == -1)
{ // Clean up temp files.
tmp_file->close(); remove(tmpname);
pos_tmp_file.close(); remove(pos_tmp_filename.c_str());
for (unsigned int uj=0; uj<ui; uj++)
{
(tmp_files[2*uj])->close(); remove(tmp_filenames[2*uj].c_str());
(tmp_files[2*uj+1])->close(); remove(tmp_filenames[2*uj+1].c_str());
}
LOG.error("\n\nCould not open temporary file.\n\n"
"Most likely this is because the system is not allowing me to open enough temporary files.\n"
"Try using ulimit -n <int> to increase the number of allowed open files.\n", 12);
}
::close(ret);
tmp_files[2*ui] = tmp_file;
tmp_filenames[2*ui] = tmpname;
char tmpname2[new_tmp.size()];
strcpy(tmpname2, new_tmp.c_str());
ret = mkstemp(tmpname2);
ofstream *tmp_file2 = new ofstream(tmpname2);
if (ret == -1)
{ // Clean up temp files.
tmp_file2->close(); remove(tmpname2);
pos_tmp_file.close(); remove(pos_tmp_filename.c_str());
for (unsigned int uj=0; uj<ui; uj++)
{
(tmp_files[2*uj])->close(); remove(tmp_filenames[2*uj].c_str());
(tmp_files[2*uj+1])->close(); remove(tmp_filenames[2*uj+1].c_str());
}
(tmp_files[2*ui])->close(); remove(tmp_filenames[2*ui].c_str());
LOG.error("\n\nCould not open temporary file.\n\n"
"Most likely this is because the system is not allowing me to open enough temporary files.\n"
"Try using ulimit -n <int> to increase the number of allowed open files.\n", 12);
}
::close(ret);
tmp_files[2*ui+1] = tmp_file2;
tmp_filenames[2*ui+1] = tmpname2;
}
vector<char> variant_line;
entry *e = get_entry_object();
ofstream *tmp_file;
int POS;
while(!eof())
{
get_entry(variant_line);
e->reset(variant_line);
N_entries += e->apply_filters(params);
if(!e->passed_filters)
continue;
N_kept_entries++;
e->parse_basic_entry(true);
POS = e->get_POS();
max_pos = max(POS, max_pos);
pos_tmp_file << POS << endl;
for (unsigned int ui=0; ui<meta_data.N_indv; ui++)
{
if (include_indv[ui] == false)
continue;
e->parse_genotype_entry(ui, true);
e->get_indv_GENOTYPE_ids(ui, genotypes);
e->get_alleles_vector(alleles);
for (unsigned int k=0; k<2; k++)
{
tmp_file = tmp_files[(2*ui)+k];
int geno;
if (k == 0)
geno = genotypes.first;
else
geno = genotypes.second;
if ((geno >= 0) && (e->include_genotype[ui]==true))
(*tmp_file) << alleles[geno];
else
(*tmp_file) << "N";
}
}
n_snps++;
}
pos.setf(ios::fixed,ios::floatfield);
pos.precision(4);
ifstream pos_read_file(pos_tmp_filename.c_str());
string tmp_line;
for (unsigned int ui=0; ui<n_snps; ui++)
{
getline(pos_read_file,tmp_line);
POS = header::str2int(tmp_line);
pos << POS << endl;
}
pos.close();
for (unsigned int ui=0; ui<meta_data.N_indv; ui++)
{
if (include_indv[ui] == false)
continue;
for (unsigned int k=0; k<2; k++)
{
ofstream *tmp_file = tmp_files[2*ui+k];
(*tmp_file) << endl;
tmp_file->close();
delete tmp_file;
ifstream read_file(tmp_filenames[2*ui+k].c_str());
if (!read_file.good())
LOG.error("\n\nCould not open temporary file.\n\n"
"Most likely this is because the system is not allowing me to open enough temporary files.\n"
"Try using ulimit -n <int> to increase the number of allowed open files.\n", 12);
getline(read_file, tmp_line);
snps << ">" << meta_data.indv[ui] << "-" << k << endl;
snps << tmp_line << endl;
read_file.close();
remove(tmp_filenames[2*ui+k].c_str());
}
}
delete e;
remove(pos_tmp_filename.c_str());
snps.close();
}
// Output INFO fields in tab-delimited format
void variant_file::output_INFO_for_each_site(const parameters &params)
{
......
......@@ -1567,7 +1567,7 @@ void variant_file::output_haplotype_r2(const parameters &params)
if ((r2 < min_r2) | (r2 != r2))
continue;
out << CHROM << "\t" << POS << "\t" << POS2 << "\t" << chr_count << "\t" << r2 << "\t" << D << "\t" << Dprime << "\t" << endl;
out << CHROM << "\t" << POS << "\t" << POS2 << "\t" << chr_count << "\t" << r2 << "\t" << D << "\t" << Dprime << endl;
}
}
tmp_file.close();
......
.\" Manpage for vcftools.
.TH vcftools man page 1 "27 August 2014" "0.1.13" "vcftools man page"
.TH vcftools man page 1 "05 January 2016" "0.1.14" "vcftools man page"
.SH NAME
vcftools v0.1.13 \- Utilities for the variant call format (VCF) and binary variant call format (BCF)
vcftools v0.1.14 \- Utilities for the variant call format (VCF) and binary variant call format (BCF)
.SH SYNOPSIS
.B vcftools
[
......@@ -693,9 +693,11 @@ This option outputs phased haplotypes in IMPUTE reference-panel format. As IMPUT
.PP
.B --ldhat
.br
.B --ldhelmet
.br
.B --ldhat-geno
.RS 2
These options output data in LDhat format. This option requires the "--chr" filter option to also be used. The first option outputs phased data only, and therefore also implies "--phased" be used, leading to unphased individuals and genotypes being excluded. The second option treats all of the data as unphased, and therefore outputs LDhat files in genotype/unphased format. Two output files are generated with the suffixes ".ldhat.sites" and ".ldhat.locs", which correspond to the LDhat "sites" and "locs" input files respectively.
These options output data in LDhat/LDhelmet format. This option requires the "--chr" filter option to also be used. The two first options output phased data only, and therefore also implies "--phased" be used, leading to unphased individuals and genotypes being excluded. For LDhelmet, only snps will be considered, and therefore it implies "--remove-indels". The second option treats all of the data as unphased, and therefore outputs LDhat files in genotype/unphased format. Two output files are generated with the suffixes ".ldhat.sites" and ".ldhat.locs", which correspond to the LDhat "sites" and "locs" input files respectively; for LDhelmet, the two files generated have the suffixes ".ldhelmet.snps" and ".ldhelmet.pos", which corresponds to the "SNPs" and "positions" files.
.RE
.PP
.B --BEAGLE-GL
......
......@@ -89,6 +89,7 @@ int main(int argc, char *argv[])
if (params.output_BEAGLE_genotype_likelihoods_PL == true) vf->output_BEAGLE_genotype_likelihoods(params, 1);
if (params.output_as_ldhat_unphased == true) vf->output_as_LDhat_unphased(params);
if (params.output_as_ldhat_phased == true) vf->output_as_LDhat_phased(params);
if (params.output_as_ldhelmet == true) vf->output_as_LDhelmet(params);
if (params.output_singletons == true) vf->output_singletons(params);
if (params.output_site_pi == true) vf->output_per_site_nucleotide_diversity(params);
if (params.pi_window_size > 0) vf->output_windowed_nucleotide_diversity(params);
......
......@@ -121,10 +121,10 @@ sub read_chunk
return;
}
my $to = $pos + $$self{size};
my $cmd = "samtools faidx $$self{file} $chr:$pos-$to";
my $cmd = "samtools faidx \Q$$self{file}\E \Q$chr:$pos-$to\E";
my @out = $self->cmd($cmd) or $self->throw("$cmd: $!");
my $line = shift(@out);
if ( !($line=~/^>$chr:(\d+)-(\d+)/) ) { $self->throw("Could not parse: $line"); }
if ( !($line=~/^>\Q$chr\E:(\d+)-(\d+)/) ) { $self->throw("Could not parse: $line"); }
$$self{from} = $1;
my $chunk = '';
while ($line=shift(@out))
......
......@@ -24,7 +24,7 @@ dist_bin_SCRIPTS = \
vcf-tstv \
vcf-validator
pmdir = $(PMDIR)
pmdir = $(exec_prefix)/$(PMDIR)
dist_pm_DATA = \
FaSlice.pm \
......
package Vcf;
our $VERSION = 'r953';
our $VERSION = 'v0.1.14-12-gcdb80b8';
# http://vcftools.sourceforge.net/specs.html
# http://samtools.github.io/hts-specs/
......@@ -410,6 +410,7 @@ sub _set_version
elsif ( $$self{version} eq '4.0' ) { $reader=Vcf4_0->new(%$self); }
elsif ( $$self{version} eq '4.1' ) { $reader=Vcf4_1->new(%$self); }
elsif ( $$self{version} eq '4.2' ) { $reader=Vcf4_2->new(%$self); }
elsif ( $$self{version} eq '4.3' ) { $reader=Vcf4_3->new(%$self); }
else
{
$self->warn(qq[The version "$$self{version}" not supported, assuming VCFv$$self{default_version}\n]);
......@@ -1612,6 +1613,7 @@ sub event_type
elsif ( $allele=~/^[ACGT]$/ ) { $len=1; $type='s'; $ht=$allele; }
elsif ( $allele=~/^I/ ) { $len=length($allele)-1; $type='i'; $ht=$'; }
elsif ( $allele=~/^D(\d+)/ ) { $len=-$1; $type='i'; $ht=''; }
elsif ( length($allele)==length($ref) && $allele=~/^[ACGTN]+$/ && $ref=~/^[ACGTN]+$/ ) { $len = length($allele); $type='s'; $ht=$allele; }
else
{
my $chr = ref($rec) eq 'HASH' ? $$rec{CHROM} : 'undef';
......@@ -2933,6 +2935,33 @@ sub Vcf4_0::format_header_line
}
$value = '<' .join(',',@items). '>';
}
elsif ( $tmp_rec{key} =~ /vcfProcessLog(_.+)*/)
{
my %has = ( key=>1, handler=>1, default=>1 );
my @items;
my @knownKeys = qw(InputVCF InputVCFSource InputVCFVer InputVCFParam);
for my $key (qw(InputVCF InputVCFSource InputVCFVer InputVCFParam), sort keys %tmp_rec)
{
if ( !exists($tmp_rec{$key}) || $has{$key} || !grep(/^$key$/,@knownKeys)) { next; }
my $value;
if($key eq "InputVCFParam"){
$value=undef;
foreach my $ky(keys %{$tmp_rec{$key}}){
if(defined($value)){
$value .= ",";
}
$value .= $ky."=".$tmp_rec{$key}->{$ky};
}
#$value="<".$tmp_rec{$key}.">";
}else{
$value = $tmp_rec{$key};
}
push @items, "$key=<$value>";
$has{$key}=1;
}
$value = '<' .join(',',@items). '>';
}
else { $value = $tmp_rec{value}; }
my $line = "##$tmp_rec{key}=".$value."\n";
......@@ -2981,7 +3010,15 @@ sub Vcf4_0::parse_header_line
else { $self->throw(qq[Could not parse header line: $line\nStopped at [$tmp].\n]); }
}
if ( $tmp=~/^[^,\\"]+/ ) { $attr_value .= $&; $tmp = $'; }
if( $tmp!~/>,/ && $tmp=~/^(<)(.+,{1}.+)(>)$/) {$tmp=$'; %$attr_value = split(/[,=]/,$2);}
if( $tmp=~ m/^<"([^,">]+)">/) {$tmp=$'; $attr_value = $1;}
if ( $tmp=~/^[^,\\"]+/) { $attr_value .= $&; $tmp = $';
if($attr_value =~ m/^<.+>$/){
$attr_value =~ s/^<//; $attr_value =~ s/>$//;
}
}
if ( $tmp=~/^\\\\/ ) { $attr_value .= '\\\\'; $tmp = $'; next; }
if ( $tmp=~/^\\"/ ) { $attr_value .= '\\"'; $tmp = $'; next; }
if ( $tmp eq '' or ($tmp=~/^,/ && !$quoted) or $tmp=~/^"/ )
......@@ -2994,6 +3031,10 @@ sub Vcf4_0::parse_header_line
$attr_value =~ s/^\s+//;
$attr_value =~ s/\s+$//;
}
if($tmp =~ m/^(,([^=<].)+>),/){
$quoted = 1;
$attr_value.= $1;
}
$$rec{$attr_key} = $attr_value;
$tmp = $';
if ( $quoted && $tmp=~/^,/ ) { $tmp = $'; }
......@@ -3004,6 +3045,7 @@ sub Vcf4_0::parse_header_line
$self->throw(qq[Could not parse header line: $line\nStopped at [$tmp].\n]);
}
if( $key =~ m/vcfProcessLog/) {return $rec;}
if ( $key eq 'INFO' or $key eq 'FILTER' or $key eq 'FORMAT' )
{
if ( $key ne 'PEDIGREE' && !exists($$rec{ID}) ) { $self->throw("Missing the ID tag in $line\n"); }
......@@ -3324,6 +3366,7 @@ sub new
reserved =>
{
FILTER => { 0=>1 },
cols => {CHROM=>1,POS=>1,ID=>1,REF=>1,ALT=>1,QUAL=>1,FILTER=>1,INFO=>1,FORMAT=>1},
},
handlers =>
......@@ -3518,5 +3561,79 @@ sub new
return $self;
}
sub Vcf4_2::validate_alt_field
{
my ($self,$values,$ref) = @_;
if ( @$values == 1 && $$values[0] eq '.' ) { return undef; }
my $ret = $self->_validate_alt_field($values,$ref);
if ( $ret ) { return $ret; }
my $ref_len = length($ref);
my $ref1 = substr($ref,0,1);
my @err;
my $msg = '';
for my $item (@$values)
{
if ( $item=~/^(.*)\[(.+)\[(.*)$/ or $item=~/^(.*)\](.+)\](.*)$/ )
{
if ( $1 ne '' && $3 ne '' ) { $msg=', two replacement strings given (expected one)'; push @err,$item; next; }
my $rpl;
if ( $1 ne '' )
{
$rpl = $1;
if ( $rpl ne '.' )
{
my $rref = substr($rpl,0,1);
if ( $rref ne $ref1 ) { $msg=', the first base of the replacement string does not match the reference'; push @err,$item; next; }
}
}
else
{
$rpl = $3;
if ( $rpl ne '.' )
{
my $rref = substr($rpl,-1,1);
if ( $rref ne $ref1 ) { $msg=', the last base of the replacement string does not match the reference'; push @err,$item; next; }
}
}
my $pos = $2;
if ( !($rpl=~/^[ACTGNacgtn]+$/) && $rpl ne '.' ) { $msg=', replacement string not valid (expected [ACTGNacgtn]+)'; push @err,$item; next; }
if ( !($pos=~/^\S+:\d+$/) ) { $msg=', cannot parse sequence:position'; push @err,$item; next; }
next;
}
if ( $item=~/^\.[ACTGNactgn]*([ACTGNactgn])$/ ) { next; }
elsif ( $item=~/^([ACTGNactgn])[ACTGNactgn]*\.$/ ) { next; }
elsif ( $item eq '*' ) { next; }
if ( !($item=~/^[ACTGNactgn]+$|^<[^<>\s]+>$/) ) { push @err,$item; next; }
}
if ( !@err ) { return undef; }
return 'Could not parse the allele(s) [' .join(',',@err). ']' . $msg;
}
#------------------------------------------------
# Version 4.3 specific functions
=head1 VCFv4.3
VCFv4.2 specific functions
=cut
package Vcf4_3;
use base qw(Vcf4_2);
sub new
{
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
bless $self, ref($class) || $class;
$$self{version} = '4.3';
return $self;
}
1;
File mode changed from 100644 to 100755