Skip to content
Commits on Source (2)
/*
* Copyright (C) 2014-2016 Brian L. Browning
*
* This file is part of Beagle
*
* Beagle is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Beagle is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package blbutil;
/**
* <p>Class {@code ByteArray} represents an immutable array of integer
* values between -128 and 127 that is stored as a {@code byte[]} array.
* </p>
* Instances of {@code ByteArray} are immutable.
*
* @author Brian L. Browning {@code <browning@uw.edu>}
*/
public final class ByteArray implements IntArray {
private final byte[] ba;
/**
* Constructs a new {@code ByteArray} instance from
* the specified data.
* @param ba an array of integer values between -128 and 127 inclusive
* @throws NullPointerException if {@code ba == null}
*/
public ByteArray(byte[] ba) {
this.ba = ba.clone();
}
/**
* Constructs a new {@code ByteArray} instance from the specified data.
* @param ia an array of integer values between -128 and 127 inclusive
* @throws IllegalArgumentException if
* {@code (ia[j] < -128 || ia[j] > 127)} for any index {@code j}
* satisfying {@code (j >= 0 && j < ia.length)}
* @throws NullPointerException if {@code ia == null}
*/
public ByteArray(int[] ia) {
this(ia, 0, ia.length);
}
/**
* Constructs a new {@code ByteArray} instance from the specified data.
* @param il an list of integer values between -128 and 127 inclusive
* @throws IllegalArgumentException if
* {@code (il.get(j) < -128 || il.get(j) > 127)} for any index {@code j}
* satisfying {@code (j >= 0 && j < il.size())}
* @throws NullPointerException if {@code il == null}
*/
public ByteArray(IntList il) {
this(il, 0, il.size());
}
/**
* Constructs a new {@code ByteArray} instance from the
* specified data.
* @param ia an array of integer values between -128 and 127 inclusive
* @param from the first element to be included (inclusive)
* @param to the last element to be included (exclusive)
* @throws IllegalArgumentException if
* {@code (ia[j] < -128 || ia[j] > 127)} for any index {@code j}
* satisfying {@code (j >= from && j < to)}
* @throws IndexOutOfBoundsException if {@code from < 0 || to > ia.length}
* @throws NegativeArraySizeException if {@code to > from}
* @throws NullPointerException if {@code ia == null}
*/
public ByteArray(int[] ia, int from, int to) {
this.ba = new byte[to - from];
for (int j=from; j<to; ++j) {
if (ia[j] < -128 || ia[j] > 127) {
throw new IllegalArgumentException(String.valueOf(ia[j]));
}
ba[j - from] = (byte) ia[j];
}
}
/**
* Constructs a new {@code ByteArray} instance from the
* specified data.
* @param il an list of integer values between -128 and 127 inclusive
* @param from the first element to be included (inclusive)
* @param to the last element to be included (exclusive)
* @throws IllegalArgumentException if
* {@code (il.get(j) < -128 || il.get(j) > 127)} for any index {@code j}
* satisfying {@code (j >= from && j < to)}
* @throws IndexOutOfBoundsException if {@code from < 0 || to > il.length}
* @throws NegativeArraySizeException if {@code to > from}
* @throws NullPointerException if {@code il == null}
*/
public ByteArray(IntList il, int from, int to) {
this.ba = new byte[to - from];
for (int j=from; j<to; ++j) {
int value = il.get(j);
if (value < -128 || value > 127) {
throw new IllegalArgumentException(String.valueOf(value));
}
ba[j - from] = (byte) value;
}
}
@Override
public int size() {
return ba.length;
}
@Override
public int get(int index) {
return ba[index];
}
@Override
public String toString() {
return this.asString();
}
}
......@@ -99,6 +99,20 @@ public class Utilities {
return sdf.format(now);
}
/**
* Returns the current minutes and seconds as a string. The
* exact details of the string representation
* are unspecified and subject to change.
*
* @return the current local time as a string.
*/
public static String minutesAndSeconds() {
Date now = new Date();
SimpleDateFormat sdf =
new SimpleDateFormat("mm:ss");
return sdf.format(now);
}
/**
* <p>Returns a set of identifiers found in a text file that has
* one identifier per line. The empty set is returned if
......
......@@ -21,8 +21,8 @@ package bref;
import beagleutil.ChromIds;
import beagleutil.Samples;
import blbutil.FileUtil;
import blbutil.IntArray;
import blbutil.IntList;
import ints.IntArray;
import ints.IntList;
import blbutil.Utilities;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
......@@ -54,6 +54,31 @@ import vcf.RefGTRec;
*/
public class AsIsBref3Writer implements BrefWriter {
/**
* The end of file code for a bref file.
*/
public static final int END_OF_DATA = 0;
/**
* The integer denoting denoting the end of the index in a bref file
*/
public static final long END_OF_INDEX = -999_999_999_999_999L;
/**
* The initial integer in a bref version 3 file.
*/
public static final int MAGIC_NUMBER_V3 = 2055763188;
/**
* The byte value denoting a sequence coded record
*/
public static final byte SEQ_CODED = 0;
/**
* The byte value denoting an allele coded record
*/
public static final byte ALLELE_CODED = 1;
private final String WRITE_ERR = "Error writing file";
private final String CONTIGUITY_ERR = "Error: chromosomes not contiguous";
......@@ -62,7 +87,6 @@ public class AsIsBref3Writer implements BrefWriter {
private final Comparator<String[]> ALLELES_COMP = allelesComparator();
public final int MAX_SAMPLES = (1 << 29) - 1;
public final int MAX_BLOCK_BYTES = Integer.MAX_VALUE;
private int lastChromIndex = -1;
private IntArray hap2Seq = null;
......@@ -145,10 +169,10 @@ public class AsIsBref3Writer implements BrefWriter {
}
if (rec.isAlleleCoded()==false) {
if (hap2Seq==null) {
hap2Seq = rec.hapToSeq();
hap2Seq = rec.map(0);
}
else if (rec.hapToSeq()!=hap2Seq) {
hap2Seq = rec.hapToSeq();
else if (rec.map(0)!=hap2Seq) {
hap2Seq = rec.map(0);
startNewBlock = true;
}
}
......@@ -214,10 +238,12 @@ public class AsIsBref3Writer implements BrefWriter {
}
}
else {
brefOut.writeChar(rec.seqToAllele().size());
IntArray hap2seq = rec.hapToSeq();
for (int j=0, n=hap2seq.size(); j<n; ++j) {
brefOut.writeChar(hap2seq.get(j));
assert rec.nMaps()==2;
IntArray hapToSeq = rec.map(0);
IntArray seqToAllele = rec.map(1);
brefOut.writeChar(seqToAllele.size());
for (int j=0, n=hapToSeq.size(); j<n; ++j) {
brefOut.writeChar(hapToSeq.get(j));
}
}
bytesWritten += (nHaps + 1)*Character.BYTES;
......@@ -227,7 +253,8 @@ public class AsIsBref3Writer implements BrefWriter {
if (rec.nAlleles() >= 256) {
Utilities.exit("ERROR: more than 256 alleles: " + rec.marker());
}
IntArray seq2Allele = rec.seqToAllele();
assert rec.nMaps()==2;
IntArray seq2Allele = rec.map(1);
writeMarker(rec.marker());
brefOut.writeByte(SEQ_CODED);
for (int j=0, n=seq2Allele.size(); j<n; ++j) {
......@@ -367,7 +394,7 @@ public class AsIsBref3Writer implements BrefWriter {
}
private DataOutputStream dataOutputStream(File file) {
OutputStream dos = null;
OutputStream dos;
if (file==null) {
dos = new DataOutputStream(System.out);
}
......
......@@ -55,7 +55,7 @@ public final class Bref3It implements SampleFileIt<RefGTRec> {
/**
* Constructs a new {@code Bref3It} instance.
* @param brefFile a bref version 3 file or {@null} if the
* @param brefFile a bref version 3 file or {@code null} if the
* bref version 3 file is to be read from standard input
*
* @throws IllegalArgumentException if a format error is detected in a
......
......@@ -20,11 +20,11 @@ package bref;
import beagleutil.ChromIds;
import beagleutil.Samples;
import blbutil.CharArray;
import ints.CharArray;
import blbutil.Const;
import blbutil.Filter;
import blbutil.IntArray;
import blbutil.UnsignedByteArray;
import ints.IntArray;
import ints.UnsignedByteArray;
import blbutil.Utilities;
import java.io.DataInput;
import java.io.IOException;
......@@ -185,7 +185,7 @@ public final class Bref3Reader {
private static void readAndCheckMagicNumber(DataInput di) throws IOException {
int magicNumber=di.readInt();
if (magicNumber!=BrefWriter.MAGIC_NUMBER_V3) {
if (magicNumber!=AsIsBref3Writer.MAGIC_NUMBER_V3) {
String s = "ERROR: Unrecognized input file. Was input file created "
+ Const.nl + "with a different version of the bref program?";
Utilities.exit(s);
......
......@@ -36,37 +36,6 @@ import vcf.RefGTRec;
*/
public interface BrefWriter extends Closeable {
/**
* The end of file code for a bref file.
*/
final int END_OF_DATA = 0;
/**
* The integer denoting denoting the end of the index in a bref file
*/
final long END_OF_INDEX = -999_999_999_999_999L;
/**
* The initial integer in a bref version 2 file.
*/
final int MAGIC_NUMBER_V2 = 223579146;
/**
* The initial integer in a bref version 3 file.
*/
final int MAGIC_NUMBER_V3 = 2055763188;
/**
* The byte value denoting a sequence coded record
*/
final byte SEQ_CODED = 0;
/**
* The byte value denoting an allele coded record
*/
final byte ALLELE_CODED = 1;
/**
* Returns the list of samples.
* @return the list of samples
......
......@@ -19,8 +19,8 @@
package bref;
import beagleutil.Samples;
import blbutil.IntArray;
import blbutil.IntList;
import ints.IntArray;
import ints.IntList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
......@@ -281,7 +281,7 @@ public class SeqCoder3 {
}
List<RefGTRec> list = new ArrayList<>(recs.size());
int[] seq2Hap = seq2Hap(); // can this be created when adding records?
IntArray hap2seq = IntArray.create(hap2Seq, 0, (seq2Hap.length));
IntArray hap2seq = IntArray.create(hap2Seq, seq2Hap.length);
for (int j=0, n=recs.size(); j<n; ++j) {
RefGTRec rec = recs.get(j);
Marker m = rec.marker();
......@@ -307,9 +307,9 @@ public class SeqCoder3 {
private IntArray seq2Allele(RefGTRec rec, int[] seq2Hap) {
int[] seq2Allele = new int[seq2Hap.length];
for (int j=0; j<seq2Allele.length; ++j) {
seq2Allele[j] = rec.allele(seq2Hap[j]);
seq2Allele[j] = rec.get(seq2Hap[j]);
}
return IntArray.create(seq2Allele, 0, rec.nAlleles());
return IntArray.create(seq2Allele, rec.nAlleles());
}
/**
......
......@@ -24,9 +24,7 @@ import blbutil.SampleFileIt;
import blbutil.Utilities;
import java.io.File;
import java.io.PrintWriter;
import vcf.GTRec;
import vcf.RefGTRec;
import vcf.SeqCodedRefGT;
import vcf.VcfWriter;
/**
......
......@@ -18,14 +18,12 @@
*/
package haplotype;
import blbutil.Const;
import java.util.BitSet;
import vcf.Marker;
import vcf.Markers;
/**
* <p>Class {@code BitHapPair} represents a pair of haplotypes for a sample.
* The class stores alleles using {@code java.util.BitSet} objects.
* The class stores alleles as a bit array.
* </p>
* Instances of class {@code BitHapPair} are immutable.
*
......@@ -33,10 +31,12 @@ import vcf.Markers;
*/
public final class BitHapPair implements HapPair {
private static final int LOG2_BITS_PER_WORD = 6;
private final Markers markers;
private final int idIndex;
private final BitSet alleles1;
private final BitSet alleles2;
private final long[] bits1;
private final long[] bits2;
/**
* Constructs a new {@code BitHapPair} instance.
......@@ -69,13 +69,14 @@ public final class BitHapPair implements HapPair {
}
this.markers = markers;
this.idIndex = idIndex;
this.alleles1 = toBitSet(markers, alleles1);
this.alleles2 = toBitSet(markers, alleles2);
this.bits1 = toBitArray(markers, alleles1);
this.bits2 = toBitArray(markers, alleles2);
}
private static BitSet toBitSet(Markers markers, int[] alleles) {
int index = 0;
BitSet bs = new BitSet(markers.sumHaplotypeBits());
private static long[] toBitArray(Markers markers, int[] alleles) {
int nWords = (markers.sumHaplotypeBits() + (Long.SIZE-1)) >> LOG2_BITS_PER_WORD;
long[] bits = new long[nWords];
int bitIndex = 0;
for (int k=0; k<alleles.length; ++k) {
int allele = alleles[k];
if (allele < 0 || allele >= markers.marker(k).nAlleles()) {
......@@ -86,35 +87,40 @@ public final class BitHapPair implements HapPair {
int mask = 1;
int nBits = markers.sumHaplotypeBits(k+1) - markers.sumHaplotypeBits(k);
for (int l=0; l<nBits; ++l) {
boolean b = (allele & mask)==mask;
bs.set(index++, b);
if ((allele & mask)==mask) {
int wordIndex = bitIndex >> LOG2_BITS_PER_WORD;
bits[wordIndex] |= (1L << bitIndex);
}
bitIndex++;
mask <<= 1;
}
}
return bs;
return bits;
}
@Override
public int allele1(int marker) {
return allele(alleles1, marker);
return allele(bits1, marker);
}
@Override
public int allele2(int marker) {
return allele(alleles2, marker);
return allele(bits2, marker);
}
private int allele(BitSet bitset, int marker) {
private int allele(long[] alleleBits, int marker) {
int start = markers.sumHaplotypeBits(marker);
int end = markers.sumHaplotypeBits(marker+1);
if (end==(start+1)) {
return bitset.get(start) ? 1 : 0;
int wordIndex = start >> LOG2_BITS_PER_WORD;
return (int) (alleleBits[wordIndex] >> start) & 1;
}
int allele = 0;
int mask = 1;
for (int j=start; j<end; ++j) {
if (bitset.get(j)) {
allele += mask;
int wordIndex = j >> LOG2_BITS_PER_WORD;
if ((alleleBits[wordIndex] & (1L << j)) != 0) {
allele |= mask;
}
mask <<= 1;
}
......@@ -150,12 +156,8 @@ public final class BitHapPair implements HapPair {
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("idIndex=");
sb.append("BitHapPair: idIndex=");
sb.append(idIndex);
sb.append(Const.nl);
sb.append(alleles1);
sb.append(Const.nl);
sb.append(alleles2);
return sb.toString();
}
}
......@@ -21,9 +21,9 @@ package haplotype;
import beagleutil.Samples;
import java.util.Collections;
import java.util.List;
import vcf.GT;
import vcf.Marker;
import vcf.Markers;
import vcf.PhasedGT;
/**
* <p>Class {@code HapPairPhasedGT} stores a list of samples and a
......@@ -34,11 +34,11 @@ import vcf.PhasedGT;
*
* @author Brian L. Browning {@code <browning@uw.edu>}
*/
public final class HapPairPhasedGT implements PhasedGT {
public final class HapPairPhasedGT implements GT {
private final Markers markers;
private final Samples samples;
private final HapPair[] hapPairs;
private final BitHapPair[] hapPairs;
/**
* Constructs a new {@code BasicPhasedGT} instance from the specified data.
......@@ -65,7 +65,7 @@ public final class HapPairPhasedGT implements PhasedGT {
* {@code (hapPairList == null || hapPairList(j) == null)}
* for any {@code j} satisfying {@code (0 <= j && j < hapPairList.size())}
*/
public HapPairPhasedGT(Samples samples, List<HapPair> hapPairList) {
public HapPairPhasedGT(Samples samples, List<BitHapPair> hapPairList) {
if (hapPairList.isEmpty()) {
throw new IllegalArgumentException("haps.isEmpy()==true");
}
......@@ -73,10 +73,10 @@ public final class HapPairPhasedGT implements PhasedGT {
checkSamples(samples, hapPairList);
this.markers = checkAndExtractMarkers(hapPairList);
this.samples = samples;
this.hapPairs = hapPairList.toArray(new HapPair[0]);
this.hapPairs = hapPairList.toArray(new BitHapPair[0]);
}
private void checkSamples(Samples samples, List<HapPair> hapPairs) {
private static void checkSamples(Samples samples, List<BitHapPair> hapPairs) {
if (samples.nSamples()!=hapPairs.size()) {
throw new IllegalArgumentException("inconsistent samples");
}
......@@ -101,7 +101,7 @@ public final class HapPairPhasedGT implements PhasedGT {
* {@code hapPairList == null || hapPairList(j) == null}
* for any {@code j} satisfying {@code 0 <= j && j < hapPairList.size()}
*/
static Markers checkAndExtractMarkers(List<HapPair> hapPairList) {
static Markers checkAndExtractMarkers(List<BitHapPair> hapPairList) {
if (hapPairList.isEmpty()) {
return Markers.create(new Marker[0]);
}
......@@ -116,6 +116,30 @@ public final class HapPairPhasedGT implements PhasedGT {
}
}
@Override
public boolean isPhased(int marker, int sample) {
if (marker < 0 || marker >= this.nMarkers()) {
throw new IndexOutOfBoundsException(String.valueOf(marker));
}
if (sample < 0 || sample >= this.nSamples()) {
throw new IndexOutOfBoundsException(String.valueOf(sample));
}
return true;
}
@Override
public boolean isPhased(int sample) {
if (sample < 0 || sample >= this.nSamples()) {
throw new IndexOutOfBoundsException(String.valueOf(sample));
}
return true;
}
@Override
public boolean isPhased() {
return true;
}
@Override
public int allele1(int marker, int hapPair) {
return hapPairs[hapPair].allele1(marker);
......
/*
* Copyright (C) 2014-2016 Brian L. Browning
*
* This file is part of Beagle
*
* Beagle is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Beagle is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package haplotype;
import vcf.Marker;
import vcf.Markers;
/**
* <p>Interface {@code HapPairs} represents a list of haplotype pairs.
* Each haplotype pair is guaranteed to have two non-missing
* alleles at each marker.
* </p>
* All instances of {@code HapPairs} are required to
* be immutable.
*
* @author Brian L. Browning {@code <browning@uw.edu>}
*/
public interface HapPairs {
/**
* Returns the allele for the specified marker and haplotype.
* @param marker a marker index
* @param haplotype a haplotype index
* @return the allele for the specified marker and haplotype
*
* @throws IndexOutOfBoundsException if
* {@code marker < 0 || marker >= this.nMarkers()}
* @throws IndexOutOfBoundsException if
* {@code haplotype < 0 || haplotype >= this.nHaps()}
*/
int allele(int marker, int haplotype);
/**
* Returns the first allele for the specified marker and haplotype pair.
* @param marker a marker index
* @param hapPair a haplotype pair index
* @return the first allele for the specified marker and haplotype pair
*
* @throws IndexOutOfBoundsException if
* {@code marker < 0 || marker >= this.nMarkers()}
* @throws IndexOutOfBoundsException if
* {@code hapPair < 0 || hapPair >= this.nHapPairs()}
*/
int allele1(int marker, int hapPair);
/**
* Returns the second allele for the specified marker and haplotype pair.
* @param marker a marker index
* @param hapPair a haplotype pair index
* @return the second allele for the specified marker and haplotype pair
*
* @throws IndexOutOfBoundsException if
* {@code marker < 0 || marker >= this.nMarkers()}
* @throws IndexOutOfBoundsException if
* {@code hapPair < 0 || hapPair >= this.nHapPairs()}
*/
int allele2(int marker, int hapPair);
/**
* Returns the number of markers.
* @return the number of markers
*/
int nMarkers();
/**
* Returns the markers.
* @return the markers
*/
Markers markers();
/**
* Returns the specified marker.
* @param marker a marker index
* @return the specified marker
* @throws IndexOutOfBoundsException if
* {@code marker < 0 || marker >= this.nMarkers()}
*/
Marker marker(int marker);
/**
* Returns the number of haplotypes. The returned value is equal to
* {@code 2*this.nHapPairs()}.
* @return the number of haplotypes
*/
int nHaps();
/**
* Returns the number of haplotype pairs. The returned value is
* equal to {@code this.nHaps()/2}.
* @return the number of haplotype pairs
*/
int nHapPairs();
/**
* Returns the sample identifier index for the specified haplotype pair.
* @param hapPair a haplotype pair index
* @return the sample identifier index for the specified haplotype pair
* @throws IndexOutOfBoundsException if
* {@code hapPair < 0 || hapPair >= this.nHapPairs()}
*/
public int idIndex(int hapPair);
}
......@@ -18,9 +18,9 @@
*/
package haplotype;
import vcf.GT;
import vcf.Marker;
import vcf.Markers;
import vcf.PhasedGT;
/**
* Class {@code WrappedHapPair} is a {@code HapPair} instance
......@@ -30,7 +30,7 @@ import vcf.PhasedGT;
*/
public final class WrappedHapPair implements HapPair {
private final PhasedGT phasedGT;
private final GT phasedGT;
private final int hapPair;
/**
......@@ -39,11 +39,15 @@ public final class WrappedHapPair implements HapPair {
* @param phasedGT the {@code RefGTWindow} object that
* will be "wrapped" by {@code this}
* @param hapPair a haplotype pair index
* @throws IllegalArgumentException if {@code phasedGT.isPhased() == false}
* @throws IllegalArgumentException if
* {@code hapPair < 0 || hapPair >= sampleHapPairs.nHapPairs()}
* @throws NullPointerException if {@code sampleHapPairs == null}
*/
public WrappedHapPair(PhasedGT phasedGT, int hapPair) {
public WrappedHapPair(GT phasedGT, int hapPair) {
if (phasedGT.isPhased()==false) {
throw new IllegalArgumentException("unphased data");
}
if (hapPair < 0 || hapPair >= phasedGT.nSamples()) {
throw new IllegalArgumentException("hapPair: " + hapPair);
}
......
/*
* Copyright (C) 2014-2016 Brian L. Browning
*
* This file is part of Beagle
*
* Beagle is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Beagle is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package haplotype;
import vcf.Marker;
import vcf.Markers;
import vcf.PhasedGT;
/**
* <p>Class {@code WrappedHapPairs} is a {@code HapPairs} instance
* obtain by wrapping a {@code PhasedGT} object.
* </p>
* <p>Instances of {@code WrappedHapPairs} are immutable.
* </p>
*
* @author Brian L. Browning {@code <browning@uw.edu>}
*/
public class WrappedHapPairs implements HapPairs {
private final PhasedGT phasedGT;
/**
* Constructs a new {@code WrappedHapPairs} instance.
* @param phasedGT the object to be wrapped by {@code this}
* @throws NullPointerException if {@code phasedGT == null}
*/
public WrappedHapPairs(PhasedGT phasedGT) {
if (phasedGT==null) {
throw new NullPointerException(PhasedGT.class.toString());
}
this.phasedGT = phasedGT;
}
@Override
public int allele(int marker, int haplotype) {
return phasedGT.allele(marker, haplotype);
}
@Override
public int allele1(int marker, int hapPair) {
return phasedGT.allele1(marker, hapPair);
}
@Override
public int allele2(int marker, int hapPair) {
return phasedGT.allele2(marker, hapPair);
}
@Override
public int nMarkers() {
return phasedGT.nMarkers();
}
@Override
public Markers markers() {
return phasedGT.markers();
}
@Override
public Marker marker(int marker) {
return phasedGT.marker(marker);
}
@Override
public int nHaps() {
return phasedGT.nHaps();
}
@Override
public int nHapPairs() {
return phasedGT.nSamples();
}
@Override
public int idIndex(int hapPair) {
return phasedGT.samples().idIndex(hapPair);
}
}
/*
* Copyright (C) 2014-2016 Brian L. Browning
*
* This file is part of Beagle
*
* Beagle is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Beagle is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package imp;
import blbutil.IntArray;
/**
* <p>Class {@code HapToSeq} stores an array mapping haplotype indices to
* allele sequences indices.
* </p>
* <p>Instances of class {@code HapToSeq} are immutable.
* </p>
*
* @author Brian L. Browning {@code <browning@uw.edu>}
*/
public class HapToSeq {
private final IntArray hap2Seq;
private final int nSeq;
/**
* Constructs a new {@code HapAlleles} instance from the specified data.
* The contract for this constructor and class is unspecified if
* the sequence indices are not a subset of {@code 0, 1, ..., (nSeq - 1)}.
*
* @param hap2Seq an array mapping haplotype index to sequence index
* @param nSeq the number of distinct sequence indices
* @throws IllegalArgumentException if {@code nSeq < 1}
* @throws NullPointerException if{@code hap2Seq == null}
*/
public HapToSeq(int[] hap2Seq, int nSeq) {
this.hap2Seq = IntArray.create(hap2Seq, 0, nSeq);
this.nSeq = nSeq;
}
/**
* Constructs a new {@code HapAlleles} instance from the specified data.
* The contract for this constructor and class is unspecified if
* the sequence indices are not a subset of {@code 0, 1, ..., (nSeq - 1)}.
*
* @param hap2Seq an array mapping haplotype index to sequence index
* @param nSeq the number of distinct sequence indices
* @throws NullPointerException if{@code hap2Seq == null}
*/
public HapToSeq(IntArray hap2Seq, int nSeq) {
this.hap2Seq = hap2Seq;
this.nSeq = nSeq;
}
/**
* Returns the number of haplotypes.
* @return the number of haplotypes
*/
public int nHaps() {
return hap2Seq.size();
}
/**
* Returns the number of sequences.
* @return the number of sequences
*/
public int nSeqs() {
return nSeq;
}
/**
* Returns the array mapping haplotype indices to sequence indices.
* @return the array mapping haplotype indices to sequence indices
*/
public IntArray hap2Seq() {
return hap2Seq;
}
}
......@@ -18,9 +18,10 @@
*/
package imp;
import blbutil.IntArray;
import ints.IntArray;
import ints.IndexArray;
import java.util.Arrays;
import vcf.PhasedGT;
import vcf.GT;
import vcf.RefGT;
import vcf.RefGTRec;
......@@ -38,7 +39,7 @@ public class HaplotypeCoder {
private final int nRefHaps;
private final int nHaps;
private final RefGT ref;
private final PhasedGT targ;
private final GT targ;
/**
* Constructs a new {@code HaplotypeCoder} instance from the specified
......@@ -51,7 +52,7 @@ public class HaplotypeCoder {
* @throws NullPointerException if
* {@code refHapPairs == null || targetHapPairs == null}
*/
public HaplotypeCoder(RefGT restrictRefGT, PhasedGT phasedTarg) {
public HaplotypeCoder(RefGT restrictRefGT, GT phasedTarg) {
if (restrictRefGT.markers().equals(phasedTarg.markers())==false) {
throw new IllegalArgumentException("inconsistent markers");
}
......@@ -65,7 +66,7 @@ public class HaplotypeCoder {
* Returns the phased reference genotypes at the target markers.
* @return the phased reference genotypes at the target markers
*/
public PhasedGT refHapPairs() {
public RefGT refHapPairs() {
return ref;
}
......@@ -73,7 +74,7 @@ public class HaplotypeCoder {
* Returns the phased target genotypes.
* @return the phased target genotypes
*/
public PhasedGT targHapPairs() {
public GT targHapPairs() {
return targ;
}
......@@ -93,7 +94,7 @@ public class HaplotypeCoder {
* @throws IndexOutOfBoundsException if
* {@code start < 0 || end >= this.refHapPairs.nMarkers()}
*/
public HapToSeq run(int start, int end) {
public IndexArray run(int start, int end) {
if (start >= end) {
throw new IllegalArgumentException("start >= end");
}
......@@ -111,10 +112,10 @@ public class HaplotypeCoder {
return false;
}
else {
IntArray hapToSeq = ref.get(start).hapToSeq();
IntArray hapToSeq = ref.get(start).map(0);
for (int m=start+1; m<end; ++m) {
RefGTRec rec = ref.get(m);
if (rec.isAlleleCoded() || rec.hapToSeq()!=hapToSeq) {
if (rec.isAlleleCoded() || rec.map(0)!=hapToSeq) {
return false;
}
}
......@@ -122,7 +123,7 @@ public class HaplotypeCoder {
}
}
private HapToSeq codeTarg(int start, int[][] seqMap) {
private IndexArray codeTarg(int start, int[][] seqMap) {
int nTargHaps = targ.nHaps();
int[] hapToSeq = new int[nTargHaps];
Arrays.fill(hapToSeq, 1);
......@@ -140,18 +141,24 @@ public class HaplotypeCoder {
hapToSeq[h] = seqMap[j][index];
}
}
return new HapToSeq(hapToSeq, seqCnt);
IntArray intArray = IntArray.create(hapToSeq, seqCnt);
return new IndexArray(intArray, seqCnt);
}
private HapToSeq codeSeqCodedRef(int start, int end) {
private IndexArray codeSeqCodedRef(int start, int end) {
int[][] seqMap = new int[end - start][];
HapToSeq codedTarg = codeTarg(start, seqMap);
int[] seq1ToSeq2 = new int[ref.get(start).seqToAllele().size()];
IndexArray codedTarg = codeTarg(start, seqMap);
RefGTRec rec = ref.get(start);
assert rec.nMaps()==2;
IntArray seqToAllele = rec.map(1);
int[] seq1ToSeq2 = new int[seqToAllele.size()];
Arrays.fill(seq1ToSeq2, 1);
for (int j=0; j<seqMap.length; ++j) {
int m = start + j;
int nAlleles = ref.marker(m).nAlleles();
IntArray seqToAllele = ref.get(m).seqToAllele();
rec = ref.get(m);
assert rec.nMaps()==2;
seqToAllele = rec.map(1);
for (int s=0; s<seq1ToSeq2.length; ++s) {
if (seq1ToSeq2[s]>0) {
int index = seq1ToSeq2[s]*nAlleles + seqToAllele.get(s);
......@@ -159,12 +166,12 @@ public class HaplotypeCoder {
}
}
}
return combine(ref.get(start).hapToSeq(), seq1ToSeq2, codedTarg);
return combine(ref.get(start).map(0), seq1ToSeq2, codedTarg);
}
private HapToSeq combine(IntArray refHapToSeq, int[] seq1ToSeq2,
HapToSeq codedTarg) {
IntArray targSeq = codedTarg.hap2Seq();
private IndexArray combine(IntArray refBasicIndexArray, int[] seq1ToSeq2,
IndexArray codedTarg) {
IntArray targSeq = codedTarg.intArray();
IntArray combined = new IntArray() {
@Override
......@@ -175,19 +182,19 @@ public class HaplotypeCoder {
@Override
public int get(int index) {
if (index < nRefHaps) {
return seq1ToSeq2[refHapToSeq.get(index)];
return seq1ToSeq2[refBasicIndexArray.get(index)];
}
else {
return targSeq.get(index - nRefHaps);
}
}
} ;
return new HapToSeq(combined, codedTarg.nSeqs());
return new IndexArray(combined, codedTarg.valueSize());
}
private HapToSeq codeSeq(int start, int end) {
private IndexArray codeSeq(int start, int end) {
int[][] seqMap = new int[end - start][];
HapToSeq codedTarg = codeTarg(start, seqMap);
IndexArray codedTarg = codeTarg(start, seqMap);
int[] codedRefHap = new int[nRefHaps];
Arrays.fill(codedRefHap, 1);
for (int j=0; j<seqMap.length; ++j) {
......@@ -203,8 +210,8 @@ public class HaplotypeCoder {
return combine(codedRefHap, codedTarg);
}
private HapToSeq combine(int[] codedRef, HapToSeq codedTarg) {
IntArray targSeq = codedTarg.hap2Seq();
private IndexArray combine(int[] codedRef, IndexArray codedTarg) {
IntArray targSeq = codedTarg.intArray();
IntArray combined = new IntArray() {
@Override
......@@ -222,6 +229,6 @@ public class HaplotypeCoder {
}
}
} ;
return new HapToSeq(combined, codedTarg.nSeqs());
return new IndexArray(combined, codedTarg.valueSize());
}
}
......@@ -19,15 +19,16 @@
package imp;
import beagleutil.Samples;
import blbutil.IntArray;
import blbutil.IntList;
import ints.IntArray;
import ints.IntList;
import ints.IndexArray;
import java.util.Arrays;
import java.util.stream.IntStream;
import main.CurrentData;
import vcf.GeneticMap;
import main.Par;
import vcf.GT;
import vcf.Markers;
import vcf.PhasedGT;
import vcf.RefGT;
import vcf.RefGTRec;
......@@ -47,11 +48,11 @@ public class ImpData {
private final Par par;
private final CurrentData cd;
private final RefGT refGT;
private final PhasedGT phasedTarg;
private final GT phasedTarg;
private final int[] targClustStartEnd;
private final int[] refClusterStart;
private final int[] refClusterEnd;
private final HapToSeq[] hapToSeq;
private final IndexArray[] hapToSeq;
private final float[] errProb;
private final double[] pos;
private final float[] pRecomb;
......@@ -72,16 +73,20 @@ public class ImpData {
* {@code cd.targMarkers().equals(phasedTarg.markers() == false}
* @throws IllegalArgumentException if
* {@code cd.targSamples().equals(phasedTarg.samples()) == false}
* @throws IllegalArgumentException if
* {@code phasedTarg.isPhased() == false}
* @throws NullPointerException if any parameter is {@code null}
*/
public ImpData(Par par, CurrentData cd, PhasedGT phasedTarg,
GeneticMap map) {
public ImpData(Par par, CurrentData cd, GT phasedTarg, GeneticMap map) {
if (cd.targMarkers().equals(phasedTarg.markers())==false) {
throw new IllegalArgumentException("inconsistent markers");
}
if (cd.targSamples().equals(phasedTarg.samples())==false) {
throw new IllegalArgumentException("inconsistent samples");
}
if (phasedTarg.isPhased() == false) {
throw new IllegalArgumentException("unphased data");
}
int[] targToRef = cd.markerIndices();
this.par = par;
this.cd = cd;
......@@ -123,7 +128,7 @@ public class ImpData {
int refIndex = targToRef[j];
RefGTRec rec = refGT.get(refIndex);
if (rec.isAlleleCoded()==false) {
IntArray hap2Seq = rec.hapToSeq();
IntArray hap2Seq = rec.map(0);
if (hap2Seq!=lastHap2Seq) {
if (lastHap2Seq!=null) {
intList.add(j);
......@@ -166,12 +171,12 @@ public class ImpData {
.toArray();
}
private static HapToSeq[] hapToSeq(RefGT restrictRef, PhasedGT phasedTarg,
private static IndexArray[] hapToSeq(RefGT restrictRef, GT phasedTarg,
int[] targStartEnd) {
HaplotypeCoder coder = new HaplotypeCoder(restrictRef, phasedTarg);
return IntStream.range(1, targStartEnd.length)
.mapToObj(j -> coder.run(targStartEnd[j-1], targStartEnd[j]))
.toArray(HapToSeq[]::new);
.toArray(IndexArray[]::new);
}
private static float[] err(float errRate, int[] startEnd) {
......@@ -254,10 +259,11 @@ public class ImpData {
}
/**
* Return the phased target genotype data
* @return the phased target genotype data;
* Return the phased target genotype data. The {@code isPhased()} method
* of the returned object returns {@code true}.
* @return the phased target genotype data
*/
public PhasedGT targGT() {
public GT targGT() {
return phasedTarg;
}
......@@ -384,7 +390,7 @@ public class ImpData {
* {@code haplotype < 0 || haplotype >= this.nHaps()}
*/
public int allele(int cluster, int hap) {
return hapToSeq[cluster].hap2Seq().get(hap);
return hapToSeq[cluster].get(hap);
}
/**
......@@ -402,7 +408,7 @@ public class ImpData {
* @throws IndexOutOfBoundsException if
* {@code cluster < 0 || cluster >= this.nClusters()}
*/
public HapToSeq hapToSeq(int cluster) {
public IndexArray hapToSeq(int cluster) {
return hapToSeq[cluster];
}
......
......@@ -18,9 +18,10 @@
*/
package imp;
import blbutil.IntArray;
import blbutil.IntList;
import ints.IntArray;
import ints.IntList;
import blbutil.Utilities;
import ints.IndexArray;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
......@@ -60,16 +61,15 @@ public final class ImpIbs {
this.seed = par.seed();
this.nRefHaps = impData.nRefHaps();
this.nStates = par.imp_states();
float step = par.step();
this.nSteps = par.nsteps();
int nStepsPerSegment = Math.round(par.imp_segment()/step);
int nStepsPerSegment = Math.round(par.imp_segment()/par.step());
this.nHapsPerStep = (par.imp_states() / nStepsPerSegment);
this.stepStarts = stepStarts(impData);
HapToSeq[] codedSteps = IntStream.range(0, stepStarts.length)
IndexArray[] codedSteps = IntStream.range(0, stepStarts.length)
.parallel()
.mapToObj(j -> codeStep(impData, stepStarts, j))
.toArray(HapToSeq[]::new);
.toArray(IndexArray[]::new);
this.ibsHaps = IntStream.range(0, codedSteps.length)
.parallel()
.mapToObj(j -> getIbsHaps(codedSteps, j))
......@@ -96,7 +96,7 @@ public final class ImpIbs {
return (nextIndex<0) ? -nextIndex-1 : nextIndex;
}
private static HapToSeq codeStep(ImpData impData, int[] starts, int startIndex) {
private static IndexArray codeStep(ImpData impData, int[] starts, int startIndex) {
int nRefHaps = impData.nRefHaps();
int nHaps = impData.nHaps();
int[] hapToSeq = IntStream.range(0, nHaps).map(i -> 1).toArray();
......@@ -105,9 +105,9 @@ public final class ImpIbs {
int seqCnt = 2; // seq 0 is reserved for sequences not found in target
for (int m=start; m<end; ++m) {
HapToSeq h2s = impData.hapToSeq(m);
IntArray codedHaps = h2s.hap2Seq();
int nAlleles = h2s.nSeqs();
IndexArray h2s = impData.hapToSeq(m);
IntArray codedHaps = h2s.intArray();
int nAlleles = h2s.valueSize();
int[] seqMap = new int[seqCnt*nAlleles];
seqCnt = 1;
......@@ -125,10 +125,11 @@ public final class ImpIbs {
}
}
}
return new HapToSeq(hapToSeq, seqCnt);
IntArray intArray = IntArray.create(hapToSeq, seqCnt);
return new IndexArray(intArray, seqCnt);
}
private int[][] getIbsHaps(HapToSeq[] codedSteps, int index) {
private int[][] getIbsHaps(IndexArray[] codedSteps, int index) {
int nTargHaps = impData.nTargHaps();
int[][] results = new int[nTargHaps][];
int nStepsToMerge = Math.min(nSteps, codedSteps.length - index);
......@@ -138,7 +139,7 @@ public final class ImpIbs {
for (int i=1; i<nStepsToMerge; ++i) {
int initCapacity = Math.min(nTargHaps, 2*lastIbs.size());
List<IntList> nextIbs = new ArrayList<>(initCapacity);
HapToSeq codedStep = codedSteps[index+i];
IndexArray codedStep = codedSteps[index+i];
for (int j=0, nj=lastIbs.size(); j<nj; ++j) {
IntList parent = lastIbs.get(j);
children = partition(parent, codedStep);
......@@ -150,9 +151,9 @@ public final class ImpIbs {
return results;
}
private List<IntList> initPartition(HapToSeq codedStep) {
IntList[] list = new IntList[codedStep.nSeqs()];
IntArray hap2Seq = codedStep.hap2Seq();
private List<IntList> initPartition(IndexArray codedStep) {
IntList[] list = new IntList[codedStep.valueSize()];
IntArray hap2Seq = codedStep.intArray();
int nHaps = hap2Seq.size();
List<IntList> children = new ArrayList<>();
for (int h=nRefHaps; h<nHaps; ++h) {
......@@ -171,9 +172,9 @@ public final class ImpIbs {
return children;
}
private List<IntList> partition(IntList parent, HapToSeq codedStep) {
IntList[] list = new IntList[codedStep.nSeqs()];
IntArray hap2Seq = codedStep.hap2Seq();
private List<IntList> partition(IntList parent, IndexArray codedStep) {
IntList[] list = new IntList[codedStep.valueSize()];
IntArray hap2Seq = codedStep.intArray();
int nHaps = parent.size();
List<IntList> children = new ArrayList<>();
int targStart = insertionPoint(parent, nRefHaps);
......
......@@ -18,8 +18,8 @@
*/
package imp;
import blbutil.IntList;
import blbutil.IntMap;
import ints.IntList;
import ints.IntMap;
/**
* <p>Class {@code ImpStates} identifies a list of pseudo-reference haplotypes
......
......@@ -19,7 +19,7 @@
package imp;
import blbutil.FloatList;
import blbutil.IntList;
import ints.IntList;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicReferenceArray;
......
......@@ -18,7 +18,7 @@
*/
package imp;
import blbutil.IntList;
import ints.IntList;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.atomic.AtomicReferenceArray;
......@@ -139,7 +139,7 @@ public class RefHapHash {
alleleHash[a] = rand.nextInt();
}
for (int i=0; i<i2hap.length; ++i) {
int allele = rec.allele(i2hap[i]);
int allele = rec.get(i2hap[i]);
if (allele!=0) {
i2hash[i] += alleleHash[allele];
altAlleles[i].add(markerOffset);
......