Skip to content
Commits on Source (6)
......@@ -11,7 +11,20 @@ cache:
jdk:
- oraclejdk8
- openjdk8
script: ./gradlew test jacocoTestReport;
matrix:
fast_finish: true
allow_failures:
- env: TEST_TYPE=FTP
include:
- jdk: oraclejdk8
env: TEST_TYPE=FTP
script:
- if [[ $TEST_TYPE == "FTP" ]]; then
./gradlew testFTP jacocoTestReport;
else
./gradlew test jacocoTestReport;
fi
after_success:
- bash <(curl -s https://codecov.io/bash)
- echo "TRAVIS_BRANCH='$TRAVIS_BRANCH'";
......
......@@ -10,9 +10,9 @@ plugins {
id 'maven'
id 'signing'
id 'jacoco'
id 'com.palantir.git-version' version '0.5.1'
id 'com.github.johnrengelman.shadow' version '1.2.3'
id 'com.github.maiflai.scalatest' version '0.15'
id 'com.palantir.git-version' version '0.11.0'
id 'com.github.johnrengelman.shadow' version '2.0.4'
id 'com.github.maiflai.scalatest' version '0.22'
}
repositories {
......@@ -36,12 +36,12 @@ dependencies {
compile "org.xerial.snappy:snappy-java:1.1.4"
compile "org.apache.commons:commons-compress:1.4.1"
compile "org.tukaani:xz:1.5"
compile "gov.nih.nlm.ncbi:ngs-java:1.2.4"
compile "gov.nih.nlm.ncbi:ngs-java:2.9.0"
testCompile "org.scala-lang:scala-library:2.12.1"
testCompile "org.scalatest:scalatest_2.12:3.0.1"
testRuntime 'org.pegdown:pegdown:1.4.2' // Necessary for generating HTML reports with ScalaTest
testCompile "org.testng:testng:6.9.9"
testCompile "org.scala-lang:scala-library:2.12.6"
testCompile "org.scalatest:scalatest_2.12:3.0.5"
testRuntime 'org.pegdown:pegdown:1.6.0' // Necessary for generating HTML reports with ScalaTest
testCompile "org.testng:testng:6.14.3"
testCompile "com.google.jimfs:jimfs:1.1"
}
......@@ -100,11 +100,22 @@ test {
tags {
exclude "slow"
exclude "broken"
exclude "ftp"
if (System.env.CI == "false") exclude "sra"
if (!OperatingSystem.current().isUnix()) exclude "unix"
}
} dependsOn findScalaAndJavaTypes
task testFTP(type: Test) {
description = "Runs the tests that require connection to a remote ftp server"
tags {
include "ftp"
exclude "slow"
exclude "broken"
}
}
task testSRA(type: Test) {
description = "Run the SRA tests"
jvmArgs += '-Dsamjdk.sra_libraries_download=true'
......@@ -115,9 +126,8 @@ task testSRA(type: Test) {
}
}
task wrapper(type: Wrapper) {
description = "Regenerate the gradle wrapper"
gradleVersion = '3.2.1'
wrapper {
gradleVersion = '4.8.1'
}
// This is a hack to disable the java 8 default javadoc lint until we fix the html formatting
......
htsjdk (2.16.1+dfsg-1) UNRELEASED; urgency=medium
* New upstream version
* debhelper 11
* Standards-Version: 4.2.1
-- Andreas Tille <tille@debian.org> Mon, 17 Sep 2018 16:01:03 +0200
htsjdk (2.14.3+dfsg-3) unstable; urgency=medium
* Reintroduce unit tests after jcommander fix
......
......@@ -10,7 +10,7 @@ Build-Depends: default-jdk (>= 2:1.9),
javahelper,
gradle-debian-helper,
maven-repo-helper,
debhelper (>= 10),
debhelper (>= 11~),
libcommons-jexl2-java,
libcommons-logging-java,
libsnappy-java,
......@@ -24,7 +24,7 @@ Build-Depends: default-jdk (>= 2:1.9),
junit4,
libjimfs-java,
scala-library
Standards-Version: 4.1.4
Standards-Version: 4.2.1
Vcs-Browser: https://salsa.debian.org/med-team/htsjdk
Vcs-Git: https://salsa.debian.org/med-team/htsjdk.git
Homepage: http://samtools.github.io/htsjdk/
......
......@@ -6,13 +6,13 @@ Last-Update: Wed, 29 Jun 2016 10:45:03 +0200
--- a/build.gradle
+++ b/build.gradle
@@ -10,9 +10,6 @@
@@ -10,9 +10,6 @@ plugins {
id 'maven'
id 'signing'
id 'jacoco'
- id 'com.palantir.git-version' version '0.5.1'
- id 'com.github.johnrengelman.shadow' version '1.2.3'
- id 'com.github.maiflai.scalatest' version '0.15'
- id 'com.palantir.git-version' version '0.11.0'
- id 'com.github.johnrengelman.shadow' version '2.0.4'
- id 'com.github.maiflai.scalatest' version '0.22'
}
repositories {
This diff is collapsed.
......@@ -17,28 +17,29 @@ Last-Updated: 2017-10-18
}
@@ -23,11 +23,11 @@ dependencies {
compile "org.tukaani:xz:1.5"
compile "gov.nih.nlm.ncbi:ngs-java:1.2.4"
compile "gov.nih.nlm.ncbi:ngs-java:2.9.0"
- testCompile "org.scala-lang:scala-library:2.12.1"
- testCompile "org.scalatest:scalatest_2.12:3.0.1"
- testRuntime 'org.pegdown:pegdown:1.4.2' // Necessary for generating HTML reports with ScalaTest
- testCompile "org.testng:testng:6.9.9"
- testCompile "org.scala-lang:scala-library:2.12.6"
- testCompile "org.scalatest:scalatest_2.12:3.0.5"
- testRuntime 'org.pegdown:pegdown:1.6.0' // Necessary for generating HTML reports with ScalaTest
- testCompile "org.testng:testng:6.14.3"
- testCompile "com.google.jimfs:jimfs:1.1"
+ testCompile "org.scala-lang:scala-library:2.11.x"
+ //testCompile "org.scalatest:scalatest_2.12:3.0.1"
+ //testRuntime 'org.pegdown:pegdown:1.4.2' // Necessary for generating HTML reports with ScalaTest
+ //testCompile "org.scalatest:scalatest_2.12:3.0.5"
+ //testRuntime 'org.pegdown:pegdown:1.6.0' // Necessary for generating HTML reports with ScalaTest
+ testCompile "org.testng:testng:debian"
+ testCompile "com.google.jimfs:jimfs:debian"
}
sourceCompatibility = 1.8
@@ -90,22 +90,20 @@ test {
@@ -90,33 +90,30 @@ test {
jvmArgs += '-Dsamjdk.sra_libraries_download=true'
}
- tags {
- exclude "slow"
- exclude "broken"
- exclude "ftp"
- if (System.env.CI == "false") exclude "sra"
- if (!OperatingSystem.current().isUnix()) exclude "unix"
- }
......@@ -48,6 +49,18 @@ Last-Updated: 2017-10-18
+
} dependsOn findScalaAndJavaTypes
task testFTP(type: Test) {
description = "Runs the tests that require connection to a remote ftp server"
tags {
- include "ftp"
+ exclude "ftp"
exclude "slow"
exclude "broken"
}
+
}
task testSRA(type: Test) {
description = "Run the SRA tests"
jvmArgs += '-Dsamjdk.sra_libraries_download=true'
......@@ -59,10 +72,9 @@ Last-Updated: 2017-10-18
+ exclude 'htsjdk/samtools/sra/**'
+ exclude 'htsjdk/samtools/BAMFileIndexTest.java'
+ exclude 'htsjdk/samtools/util/BlockCompressedOutputStreamTest.java'
+
}
task wrapper(type: Wrapper) {
wrapper {
--- a/src/test/java/htsjdk/HtsjdkTest.java
+++ b/src/test/java/htsjdk/HtsjdkTest.java
@@ -1,10 +1,9 @@
......
......@@ -6,7 +6,7 @@ Last-Updated: 17-04-2018
Forwarded: no
--- a/build.gradle
+++ b/build.gradle
@@ -115,6 +115,7 @@
@@ -124,6 +124,7 @@ wrapper {
if (JavaVersion.current().isJava8Compatible()) {
tasks.withType(Javadoc) {
options.addStringOption('Xdoclint:none', '-quiet')
......
......@@ -4,7 +4,7 @@ Description: Enable tests using testng
--- a/build.gradle
+++ b/build.gradle
@@ -86,9 +86,14 @@
@@ -86,9 +86,14 @@ test {
events "failed", "skipped"
}
......@@ -19,21 +19,3 @@ Description: Enable tests using testng
exclude 'htsjdk/samtools/sra/**'
exclude 'htsjdk/samtools/BAMFileIndexTest.java'
--- a/src/test/java/htsjdk/utils/ClassFinderTest.java
+++ b/src/test/java/htsjdk/utils/ClassFinderTest.java
@@ -1,7 +1,7 @@
package htsjdk.utils;
import htsjdk.HtsjdkTest;
-import org.junit.Assert;
+import org.testng.Assert;
import org.testng.annotations.Test;
/**
@@ -28,4 +28,4 @@
Assert.assertEquals(finder.getClasses().stream()
.filter(c -> c.getName().equals("htsjdk.utils.ClassFinder")).count(), 1);
}
-}
\ No newline at end of file
+}
#Fri Jan 20 17:10:11 EST 2017
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-4.8.1-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-all.zip
#!/usr/bin/env bash
#!/usr/bin/env sh
##############################################################################
##
......@@ -154,16 +154,19 @@ if $cygwin ; then
esac
fi
# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
function splitJvmOpts() {
JVM_OPTS=("$@")
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
APP_ARGS=$(save "$@")
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
cd "$(dirname "$0")"
fi
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
exec "$JAVACMD" "$@"
......@@ -40,7 +40,7 @@ import java.nio.file.Path;
/**
* Concrete implementation of SAMFileWriter for writing gzipped BAM files.
*/
class BAMFileWriter extends SAMFileWriterImpl {
public class BAMFileWriter extends SAMFileWriterImpl {
private final BinaryCodec outputBinaryCodec;
private BAMRecordCodec bamRecordCodec = null;
......@@ -78,7 +78,7 @@ class BAMFileWriter extends SAMFileWriterImpl {
}
protected BAMFileWriter(final OutputStream os, final String absoluteFilename, final int compressionLevel, final DeflaterFactory deflaterFactory) {
blockCompressedOutputStream = new BlockCompressedOutputStream(os, null, compressionLevel, deflaterFactory);
blockCompressedOutputStream = new BlockCompressedOutputStream(os, (Path)null, compressionLevel, deflaterFactory);
outputBinaryCodec = new BinaryCodec(blockCompressedOutputStream);
outputBinaryCodec.setOutputFileName(absoluteFilename);
}
......@@ -200,8 +200,13 @@ class BAMFileWriter extends SAMFileWriterImpl {
writeHeader(outputBinaryCodec, samFileHeader, headerString);
}
protected static void writeHeader(final OutputStream outputStream, final SAMFileHeader samFileHeader) {
final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream, null);
/**
* Write a BAM file header to an output stream in block compressed BAM format.
* @param outputStream the stream to write the BAM header to
* @param samFileHeader the header to write
*/
public static void writeHeader(final OutputStream outputStream, final SAMFileHeader samFileHeader) {
final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream, (Path)null);
final BinaryCodec outputBinaryCodec = new BinaryCodec(blockCompressedOutputStream);
writeHeader(outputBinaryCodec, samFileHeader);
try {
......
......@@ -28,6 +28,7 @@ import htsjdk.samtools.util.StringUtil;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import static htsjdk.samtools.SAMTag.CG;
/**
* Wrapper class for binary BAM records.
......@@ -39,6 +40,28 @@ public class BAMRecord extends SAMRecord {
*/
private static final int READ_NAME_OFFSET = 0;
/**
* Constant for converting between the number of operators in a Cigar and the length
* of the int[] array needed to represent it in the BAM format
*/
static public final short CIGAR_SIZE_MULTIPLIER = 4;
/**
* Maximal number of cigar operators that can be represented normally in the cigar part of the bam record.
* Records that have larger cigars will have their Cigars encoded to int[] and placed in the CG tag in the attributes (BAM only)
* This should happen upon encoding. In place of the Cigar a sentinel value will be placed <READLENGTH>S<REFERENCELENGTH>N
* <p>
* When a BAM record is decoded, the sentinel cigar informs of the existance of the CG tag, which is decoded and removed.
* The sentinel value is then replaced with the actual cigar (in memory).
*/
public final static int MAX_CIGAR_OPERATORS = 0xffff;
public final static int MAX_CIGAR_ELEMENT_LENGTH = (1 << 28) - 1;
/**
* Number of operators in "Sentinel" cigar xSyN
*/
private final static int LONG_CIGAR_SENTINEL_LENGTH = 2;
/**
* Variable-length part of BAMRecord. Lazily decoded.
*/
......@@ -203,6 +226,12 @@ public class BAMRecord extends SAMRecord {
*/
@Override
public void clearAttributes() {
// If there's a long cigar, the CG might be "hiding" in the attributes, and
// if the original attributes haven't been parsed yet, we will lose the long cigar.
// by "getting" the cigar prior to clearing the attributes, we protect against that.
if (!mAttributesDecoded) {
getCigar();
}
mAttributesDecoded = true;
mBinaryDataStale = true;
super.clearAttributes();
......@@ -248,6 +277,11 @@ public class BAMRecord extends SAMRecord {
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
super.initializeCigar(BinaryCigarCodec.decode(byteBuffer));
mCigarDecoded = true;
if (getCigarLength() == LONG_CIGAR_SENTINEL_LENGTH && isSentinelCigar(super.getCigar(), getReadLength())) {
extractCigarFromCGAttribute(super.getCigar());
}
if (null != getHeader() && getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
// Don't know line number, and don't want to force read name to be decoded.
SAMUtils.processValidationErrors(validateCigar(-1L), -1, getValidationStringency());
......@@ -256,6 +290,72 @@ public class BAMRecord extends SAMRecord {
return super.getCigar();
}
/**
* Checks to see if the provided Cigar could be considered the "sentinel cigar" that indicates
* that the actual cigar is too long for the BAM spec and should be taken from the CG tag. This
* was introduced in SAM v1.6.
*/
static boolean isSentinelCigar(final Cigar cigar, final int readLength) {
// There's an implicit assumption here there readLength == length of read in cigar, unless readLength==0
return cigar.numCigarElements() == 2 &&
cigar.getCigarElement(1).getOperator() == CigarOperator.N &&
cigar.getCigarElement(0).getOperator() == CigarOperator.S &&
(cigar.getCigarElement(0).getLength() == readLength || readLength == 0) ;
}
/**
* Long cigars (with more than 64K operators) cannot be encoded into BAM. Instead a sentinel cigar is
* placed as a placeholder, and the actual cigar is placed in the CG tag. This method
* extracts the CIGAR from the CG tag and places it into the (in memory) cigar.
*/
private void extractCigarFromCGAttribute(final Cigar sentinelCigar) throws IllegalStateException {
final int[] cigarFromCG = (int[]) getAttribute(SAMTagUtil.getSingleton().CG);
if (cigarFromCG == null) return;
// place the integer array into a buffer so we can decode it
final ByteBuffer byteBuffer = ByteBuffer.allocate(cigarFromCG.length * CIGAR_SIZE_MULTIPLIER)
.order(ByteOrder.LITTLE_ENDIAN);
byteBuffer.asIntBuffer().put(cigarFromCG);
// decode cigar
final Cigar decodedCigar = BinaryCigarCodec.decode(byteBuffer);
// Sanity check
if (decodedCigar.numCigarElements() <= MAX_CIGAR_OPERATORS) {
throw new IllegalStateException(String.format(
"Only Cigar with > %d operators should be placed in CG tag. Found %d operators. \n Here's the Cigar:\n%s",
MAX_CIGAR_OPERATORS,
decodedCigar.getCigarElements().size(),
decodedCigar.toString()));
}
if (decodedCigar.getReferenceLength() != sentinelCigar.getReferenceLength()) {
throw new IllegalStateException(String.format(
"Sentinel cigar and %s cigar should have the same reference length. Found %d and %d.\n Here's the Cigar:\n%s",
CG.name(),
sentinelCigar.getReferenceLength(),
decodedCigar.getReferenceLength(),
decodedCigar.toString()));
}
if (decodedCigar.getReadLength() != sentinelCigar.getReadLength() ) {
throw new IllegalStateException(String.format(
"Sentinel cigar and %s cigar should have the same read length. Found %d and %d.\n Here's the Cigar:\n%s",
CG.name(),
sentinelCigar.getReadLength(),
decodedCigar.getReadLength(),
decodedCigar.toString()));
}
//used initializeCigar instead of setCigar so as to not clobber the indexingBin.
initializeCigar(decodedCigar);
// remove CG attribute.
setAttribute(SAMTagUtil.getSingleton().CG, null);
}
/**
* Avoids decoding CIGAR in order to get length.
*/
......@@ -308,11 +408,17 @@ public class BAMRecord extends SAMRecord {
if (mAttributesDecoded) {
return;
}
mAttributesDecoded = true;
final int tagsOffset = readNameSize() + cigarSize() + basesSize() + qualsSize();
final int tagsSize = mRestOfBinaryData.length - tagsOffset;
final SAMBinaryTagAndValue attributes = BinaryTagCodec.readTags(mRestOfBinaryData, tagsOffset, tagsSize, getValidationStringency());
setAttributes(attributes);
// if there's a CG tag, we should getCigar() so that the CG tag has a chance of turning into the CIGAR
if (hasAttribute(CG.name())) {
getCigar();
}
}
private byte[] decodeBaseQualities() {
......@@ -358,7 +464,7 @@ public class BAMRecord extends SAMRecord {
}
private int cigarSize() {
return mCigarLength * 4;
return mCigarLength * CIGAR_SIZE_MULTIPLIER;
}
private int basesSize() {
......
......@@ -24,6 +24,7 @@
package htsjdk.samtools;
import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeEOFException;
import htsjdk.samtools.util.SortingCollection;
......@@ -31,15 +32,21 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import static htsjdk.samtools.SAMTag.CG;
/**
* Class for translating between in-memory and disk representation of BAMRecord.
*/
public class BAMRecordCodec implements SortingCollection.Codec<SAMRecord> {
private final static Log LOG = Log.getInstance(BAMRecordCodec.class);
private final SAMFileHeader header;
private final BinaryCodec binaryCodec = new BinaryCodec();
private final BinaryTagCodec binaryTagCodec = new BinaryTagCodec(binaryCodec);
private final SAMRecordFactory samRecordFactory;
private boolean isReferenceSizeWarningShowed = false;
public BAMRecordCodec(final SAMFileHeader header) {
this(header, new DefaultSAMRecordFactory());
}
......@@ -55,26 +62,33 @@ public class BAMRecordCodec implements SortingCollection.Codec<SAMRecord> {
return new BAMRecordCodec(this.header, this.samRecordFactory);
}
/** Sets the output stream that records will be written to. */
/**
* Sets the output stream that records will be written to.
*/
@Override
public void setOutputStream(final OutputStream os) {
this.binaryCodec.setOutputStream(os);
}
/** Sets the output stream that records will be written to. */
/**
* Sets the output stream that records will be written to.
*/
public void setOutputStream(final OutputStream os, final String filename) {
this.binaryCodec.setOutputStream(os);
this.binaryCodec.setOutputFileName(filename);
}
/** Sets the input stream that records will be read from. */
/**
* Sets the input stream that records will be read from.
*/
@Override
public void setInputStream(final InputStream is) {
this.binaryCodec.setInputStream(is);
}
/** Sets the input stream that records will be read from. */
/**
* Sets the input stream that records will be read from.
*/
public void setInputStream(final InputStream is, final String filename) {
this.binaryCodec.setInputStream(is);
this.binaryCodec.setInputFileName(filename);
......@@ -93,10 +107,19 @@ public class BAMRecordCodec implements SortingCollection.Codec<SAMRecord> {
// Compute block size, as it is the first element of the file representation of SAMRecord
final int readLength = alignment.getReadLength();
// if cigar is too long, put into CG tag and replace with sentinel value
if (alignment.getCigarLength() > BAMRecord.MAX_CIGAR_OPERATORS) {
final int[] cigarEncoding = BinaryCigarCodec.encode(alignment.getCigar());
alignment.setCigar(makeSentinelCigar(alignment.getCigar()));
alignment.setAttribute(CG.name(), cigarEncoding);
}
// do not combine with previous call to alignment.getCigarLength() as cigar may change in-between
final int cigarLength = alignment.getCigarLength();
int blockSize = BAMFileConstants.FIXED_BLOCK_SIZE + alignment.getReadNameLength() + 1 + // null terminated
cigarLength * 4 +
cigarLength * BAMRecord.CIGAR_SIZE_MULTIPLIER +
(readLength + 1) / 2 + // 2 bases per byte, round up
readLength;
......@@ -112,13 +135,12 @@ public class BAMRecordCodec implements SortingCollection.Codec<SAMRecord> {
}
}
// shouldn't interact with the long-cigar above since the Sentinel Cigar has the same referenceLength as
// the actual cigar.
int indexBin = 0;
if (alignment.getReferenceIndex() >= 0) {
if (alignment.getIndexingBin() != null) {
indexBin = alignment.getIndexingBin();
} else {
indexBin = alignment.computeIndexingBin();
}
if (alignment.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) {
warnIfReferenceIsTooLargeForBinField(alignment);
indexBin = alignment.computeIndexingBinIfAbsent(alignment);
}
// Blurt out the elements
......@@ -151,7 +173,7 @@ public class BAMRecordCodec implements SortingCollection.Codec<SAMRecord> {
final int[] binaryCigar = BinaryCigarCodec.encode(alignment.getCigar());
for (final int cigarElement : binaryCigar) {
// Assumption that this will fit into an integer, despite the fact
// that it is specced as a uint.
// that it is spec'ed as a uint.
this.binaryCodec.writeInt(cigarElement);
}
try {
......@@ -174,6 +196,44 @@ public class BAMRecordCodec implements SortingCollection.Codec<SAMRecord> {
}
}
/**
* Create a "Sentinel" cigar that will be placed in BAM file when the actual cigar has more than 0xffff operator,
* which are not supported by the bam format. The actual cigar will be encoded and placed in the CG attribute.
* @param cigar actual cigar to create sentinel cigar for
* @return sentinel cigar xSyN with readLength (x) and referenceLength (y) matching the input cigar.
*/
public static Cigar makeSentinelCigar(final Cigar cigar) {
// in BAM there are only 28 bits for a cigar operator, so this a protection against overflow.
if (cigar.getReadLength() > BAMRecord.MAX_CIGAR_ELEMENT_LENGTH) {
throw new IllegalArgumentException(
String.format(
"Cannot encode (to BAM) a record with more than %d cigar operations and a read-length greater than %d.",
BAMRecord.MAX_CIGAR_OPERATORS, BAMRecord.MAX_CIGAR_ELEMENT_LENGTH));
}
if (cigar.getReferenceLength() > BAMRecord.MAX_CIGAR_ELEMENT_LENGTH) {
throw new IllegalArgumentException(
String.format(
"Cannot encode (to BAM) a record that has than %d cigar operations and spans more than %d bases on the reference.",
BAMRecord.MAX_CIGAR_OPERATORS, BAMRecord.MAX_CIGAR_ELEMENT_LENGTH));
}
return new Cigar(Arrays.asList(
new CigarElement(cigar.getReadLength(), CigarOperator.S),
new CigarElement(cigar.getReferenceLength(), CigarOperator.N)));
}
private void warnIfReferenceIsTooLargeForBinField(final SAMRecord rec) {
final SAMSequenceRecord sequence = rec.getHeader() != null ? rec.getHeader().getSequence(rec.getReferenceName()) : null;
if (!isReferenceSizeWarningShowed
&& sequence != null
&& SAMUtils.isReferenceSequenceCompatibleWithBAI(sequence)
&& rec.getValidationStringency() != ValidationStringency.SILENT) {
LOG.warn("Reference length is too large for BAM bin field. Values in the bin field could be incorrect.");
isReferenceSizeWarningShowed = true;
}
}
/**
* Read the next record from the input stream and convert into a java object.
*
......@@ -182,11 +242,10 @@ public class BAMRecordCodec implements SortingCollection.Codec<SAMRecord> {
*/
@Override
public SAMRecord decode() {
int recordLength = 0;
final int recordLength;
try {
recordLength = this.binaryCodec.readInt();
}
catch (RuntimeEOFException e) {
} catch (RuntimeEOFException e) {
return null;
}
......
......@@ -15,6 +15,7 @@ import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Path;
import java.util.List;
public class BamFileIoUtils {
......@@ -86,7 +87,7 @@ public class BamFileIoUtils {
// If we found the end of the header then write the remainder of this block out as a
// new gzip block and then break out of the while loop
if (remainingInBlock >= 0) {
final BlockCompressedOutputStream blockOut = new BlockCompressedOutputStream(outputStream, null);
final BlockCompressedOutputStream blockOut = new BlockCompressedOutputStream(outputStream, (Path)null);
IOUtil.transferByStream(blockIn, blockOut, remainingInBlock);
blockOut.flush();
// Don't close blockOut because closing underlying stream would break everything
......
......@@ -25,6 +25,7 @@
package htsjdk.samtools;
import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.IOUtil;
import java.io.File;
import java.io.IOException;
......@@ -47,7 +48,7 @@ class BinaryBAMIndexWriter implements BAMIndexWriter {
* @param output BAM Index output file
*/
public BinaryBAMIndexWriter(final int nRef, final File output) {
this(nRef, null == output ? null : output.toPath());
this(nRef, IOUtil.toPath(output));
}
/**
......
......@@ -466,6 +466,32 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
iterator.setFileSource(enabled ? reader : null);
}
/**
* Prepare to iterate through SAMRecords that match the intersection of the given intervals and chunk boundaries.
* @param intervals the intervals to restrict reads to
* @param contained if <code>true</code>, return records that are strictly
* contained in the intervals, otherwise return records that overlap
* @param filePointers file pointer pairs corresponding to chunk boundaries for the
* intervals
*/
public CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals,
final boolean contained,
final long[] filePointers) {
return new CRAMIntervalIterator(intervals, contained, filePointers);
}
// convert queries -> merged BAMFileSpan -> coordinate array
private static long[] coordinatesFromQueryIntervals(BAMIndex index, QueryInterval[] queries) {
ArrayList<BAMFileSpan> spanList = new ArrayList<>(1);
Arrays.asList(queries).forEach(qi -> spanList.add(index.getSpanOverlapping(qi.referenceIndex, qi.start, qi.end)));
BAMFileSpan spanArray[] = new BAMFileSpan[spanList.size()];
for (int i = 0; i < spanList.size(); i++) {
spanArray[i] = spanList.get(i);
}
return BAMFileSpan.merge(spanArray).toCoordinateArray();
}
private class CRAMIntervalIterator extends BAMQueryMultipleIntervalsIteratorFilter
implements CloseableIterator<SAMRecord> {
......@@ -475,9 +501,12 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
SAMRecord nextRec = null;
public CRAMIntervalIterator(final QueryInterval[] queries, final boolean contained) {
this(queries, contained, coordinatesFromQueryIntervals(getIndex(), queries));
}
public CRAMIntervalIterator(final QueryInterval[] queries, final boolean contained, final long[] coordinates) {
super(queries, contained);
long[] coordinates = coordinatesFromQueryIntervals(getIndex(), queries);
if (coordinates != null && coordinates.length != 0) {
try {
unfilteredIterator = new CRAMIterator(
......@@ -493,18 +522,6 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
}
}
// convert queries -> merged BAMFileSpan -> coordinate array
private long[] coordinatesFromQueryIntervals(BAMIndex index, QueryInterval[] queries) {
ArrayList<BAMFileSpan> spanList = new ArrayList<>(1);
Arrays.asList(queries).forEach(qi -> spanList.add(mIndex.getSpanOverlapping(qi.referenceIndex, qi.start, qi.end)));
BAMFileSpan spanArray[] = new BAMFileSpan[spanList.size()];
for (int i = 0; i < spanList.size(); i++) {
spanArray[i] = spanList.get(i);
}
return BAMFileSpan.merge(spanArray).toCoordinateArray();
}
@Override
public void close() {
if (unfilteredIterator != null) {
......
......@@ -116,8 +116,12 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
@Override
protected void writeHeader(final String textHeader) {
cramContainerStream.writeHeader(
new SAMTextHeaderCodec().decode(BufferedLineReader.fromString(textHeader),fileName != null ? fileName : null));
writeHeader(new SAMTextHeaderCodec().decode(BufferedLineReader.fromString(textHeader),fileName != null ? fileName : null));
}
@Override
protected void writeHeader(final SAMFileHeader header) {
cramContainerStream.writeHeader(header);
}
@Override
......
......@@ -29,7 +29,6 @@ import java.io.File;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.WeakHashMap;
/**
* Class for reading BAM file indices, caching each contig as it's loaded and
......@@ -37,8 +36,13 @@ import java.util.WeakHashMap;
*/
class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMIndex
{
private Integer mLastReferenceRetrieved = null;
private final WeakHashMap<Integer,BAMIndexContent> mQueriesByReference = new WeakHashMap<Integer,BAMIndexContent>();
// Since null is a valid return value for this index, it's possible to have lastReferenceIndex != null and
// lastReference == null, this is effectively caching the return value null
private Integer lastReferenceIndex = null;
private BAMIndexContent lastReference = null;
private long cacheHits = 0;
private long cacheMisses = 0;
public CachingBAMFileIndex(final File file, final SAMSequenceDictionary dictionary) {
super(file, dictionary);
......@@ -110,7 +114,7 @@ class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMI
final int firstLocusInBin = getFirstLocusInBin(bin);
// Add the specified bin to the tree if it exists.
final List<Bin> binTree = new ArrayList<Bin>();
final List<Bin> binTree = new ArrayList<>();
if(indexQuery.containsBin(bin))
binTree.add(indexQuery.getBins().getBin(bin.getBinNumber()));
......@@ -139,35 +143,31 @@ class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMI
* Looks up the cached BAM query results if they're still in the cache and not expired. Otherwise,
* retrieves the cache results from disk.
* @param referenceIndex The reference to load. CachingBAMFileIndex only stores index data for entire references.
* @return The index information for this reference.
* @return The index information for this reference or null if no index information is available for the given index.
*/
@Override
protected BAMIndexContent getQueryResults(final int referenceIndex) {
// WeakHashMap is a bit weird in that its lookups are done via equals() equality, but expirations must be
// handled via == equality. This implementation jumps through a few hoops to make sure that == equality still
// holds even in the context of boxing/unboxing.
// If this query is for the same reference index as the last query, return it.
if(mLastReferenceRetrieved!=null && mLastReferenceRetrieved == referenceIndex)
return mQueriesByReference.get(referenceIndex);
// If not, check to see whether it's available in the cache.
BAMIndexContent queryResults = mQueriesByReference.get(referenceIndex);
if(queryResults != null) {
mLastReferenceRetrieved = referenceIndex;
mQueriesByReference.put(referenceIndex,queryResults);
return queryResults;
// This compares a boxed Integer to an int with == which is ok because the Integer will be unboxed to the primitive value
if(lastReferenceIndex!=null && lastReferenceIndex == referenceIndex){
cacheHits++;
return lastReference;
}
// If not in the cache, attempt to load it from disk.
queryResults = query(referenceIndex,1,-1);
if(queryResults != null) {
mLastReferenceRetrieved = referenceIndex;
mQueriesByReference.put(referenceIndex,queryResults);
return queryResults;
// If not attempt to load it from disk.
final BAMIndexContent queryResults = query(referenceIndex,1,-1);
cacheMisses++;
lastReferenceIndex = referenceIndex;
lastReference = queryResults;
return lastReference;
}
// Not even available on disk.
return null;
public long getCacheHits() {
return cacheHits;
}
public long getCacheMisses() {
return cacheMisses;
}
}