Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille · Andreas Tille
--- a/debian/changelog
+++ b/debian/changelog
-htsjdk (2.18.0+dfsg-1) UNRELEASED; urgency=medium
+htsjdk (2.18.2+dfsg-1) UNRELEASED; urgency=medium

-  * New upstream version
  TODO: Fix or drop failing tests
+  * New upstream version
+  * debhelper 12
+  * Standards-Version: 4.3.0
+  * Remove trailing whitespace in debian/changelog

- -- Andreas Tille <tille@debian.org>  Sun, 02 Dec 2018 11:18:29 +0100
+ -- Andreas Tille <tille@debian.org>  Thu, 24 Jan 2019 17:31:54 +0100

 htsjdk (2.16.1+dfsg-3) unstable; urgency=medium


--- a/debian/compat
+++ b/debian/compat
-11
+12
--- a/debian/control
+++ b/debian/control
@@ -10,7 +10,7 @@ Build-Depends: default-jdk (>= 2:1.9),
               javahelper,
               gradle-debian-helper,
               maven-repo-helper,
-               debhelper (>= 11~),
+               debhelper (>= 12~),
               libcommons-jexl2-java,
               libcommons-logging-java,
               libjaxb-api-java,
@@ -26,7 +26,7 @@ Build-Depends: default-jdk (>= 2:1.9),
               junit4,
               libjimfs-java,
               scala-library
-Standards-Version: 4.2.1
+Standards-Version: 4.3.0
 Vcs-Browser: https://salsa.debian.org/med-team/htsjdk
 Vcs-Git: https://salsa.debian.org/med-team/htsjdk.git
 Homepage: http://samtools.github.io/htsjdk/

--- a/src/main/java/htsjdk/samtools/BAMFileReader.java
+++ b/src/main/java/htsjdk/samtools/BAMFileReader.java
@@ -337,6 +337,15 @@ public class BAMFileReader extends SamReader.ReaderImplementation {
        return offset;
    }

+    /**
+     * Reads through the header and sequence records to find the virtual file offset of the first record in the BAM file.
+     * The caller is responsible for closing the stream.
+     */
+    static long findVirtualOffsetOfFirstRecord(final SeekableStream seekableStream) throws IOException {
+        final BAMFileReader reader = new BAMFileReader(seekableStream, (SeekableStream) null, false, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory());
+        return reader.mFirstRecordPointer;
+    }
+
    /**
     * If true, writes the source of every read into the source SAMRecords.
     * @param enabled true to write source information into each SAMRecord.
@@ -944,6 +953,14 @@ public class BAMFileReader extends SamReader.ReaderImplementation {
        return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained));
    }

+    /**
+     * @return a virtual file pointer for the underlying compressed stream.
+     * @see BlockCompressedInputStream#getFilePointer()
+     */
+    public long getVirtualFilePointer() {
+        return mCompressedInputStream.getFilePointer();
+    }
+
    /**
     * Iterate over the SAMRecords defined by the sections of the file described in the ctor argument.
     */

--- a/src/main/java/htsjdk/samtools/BAMSBIIndexer.java
+++ b/src/main/java/htsjdk/samtools/BAMSBIIndexer.java
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2018 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools;
+
+import htsjdk.samtools.cram.io.InputStreamUtils;
+import htsjdk.samtools.seekablestream.SeekablePathStream;
+import htsjdk.samtools.seekablestream.SeekableStream;
+import htsjdk.samtools.util.BlockCompressedInputStream;
+import htsjdk.samtools.util.IOUtil;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+/**
+ * Writes SBI files for BAM files, as understood by {@link SBIIndex}.
+ */
+public final class BAMSBIIndexer {
+
+    /**
+     * Perform indexing on the given BAM file, at the granularity level specified.
+     *
+     * @param bamFile the path to the BAM file
+     * @param granularity write the offset of every n-th alignment to the index
+     * @throws IOException as per java IO contract
+     */
+    public static void createIndex(final Path bamFile, final long granularity) throws IOException {
+        final Path splittingBaiFile = IOUtil.addExtension(bamFile, SBIIndex.FILE_EXTENSION);
+        try (SeekableStream in = new SeekablePathStream(bamFile); OutputStream out = Files.newOutputStream(splittingBaiFile)) {
+            createIndex(in, out, granularity);
+        }
+    }
+
+    /**
+     * Perform indexing on the given BAM file, at the granularity level specified.
+     *
+     * @param in a seekable stream for reading the BAM file from
+     * @param out the stream to write the index to
+     * @param granularity write the offset of every n-th alignment to the index
+     * @throws IOException as per java IO contract
+     */
+    public static void createIndex(final SeekableStream in, final OutputStream out, final long granularity) throws IOException {
+        long recordStart = SAMUtils.findVirtualOffsetOfFirstRecordInBam(in);
+        try (BlockCompressedInputStream blockIn = new BlockCompressedInputStream(in)) {
+            blockIn.seek(recordStart);
+            // Create a buffer for reading the BAM record lengths. BAM is little-endian.
+            final ByteBuffer byteBuffer = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN);
+            final SBIIndexWriter indexWriter = new SBIIndexWriter(out, granularity);
+            while (true) {
+                try {
+                    recordStart = blockIn.getFilePointer();
+                    // Read the length of the remainder of the BAM record (`block_size` in the SAM spec)
+                    InputStreamUtils.readFully(blockIn, byteBuffer.array(), 0, 4);
+                    final int blockSize = byteBuffer.getInt(0);
+                    // Process the record start position, then skip to the start of the next BAM record
+                    indexWriter.processRecord(recordStart);
+                    InputStreamUtils.skipFully(blockIn, blockSize);
+                } catch (EOFException e) {
+                    break;
+                }
+            }
+            indexWriter.finish(recordStart, in.length());
+        }
+    }
+}
--- a/src/main/java/htsjdk/samtools/CRAMIterator.java
+++ b/src/main/java/htsjdk/samtools/CRAMIterator.java
@@ -41,18 +41,18 @@ import htsjdk.samtools.cram.CRAMException;
 public class CRAMIterator implements SAMRecordIterator {
    private static final Log log = Log.getInstance(CRAMIterator.class);
    private final CountingInputStream countingInputStream;
-    private CramHeader cramHeader;
-    private ArrayList<SAMRecord> records;
+    private final CramHeader cramHeader;
+    private final ArrayList<SAMRecord> records;
    private SAMRecord nextRecord = null;
-    private CramNormalizer normalizer;
+    private final CramNormalizer normalizer;
    private byte[] refs;
    private int prevSeqId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
    public Container container;
    private SamReader mReader;
    long firstContainerOffset = 0;
-    private Iterator<Container> containerIterator;
+    private final Iterator<Container> containerIterator;

-    private ContainerParser parser;
+    private final ContainerParser parser;
    private final CRAMReferenceSource referenceSource;

    private Iterator<SAMRecord> iterator = Collections.<SAMRecord>emptyList().iterator();
@@ -68,6 +68,10 @@ public class CRAMIterator implements SAMRecordIterator {
        this.validationStringency = validationStringency;
    }

+    /**
+     * `samRecordIndex` only used when validation is not `SILENT`
+     * (for identification by the validator which records are invalid)
+     */
    private long samRecordIndex;
    private ArrayList<CramCompressionRecord> cramRecords;

@@ -84,7 +88,7 @@ public class CRAMIterator implements SAMRecordIterator {
        this.containerIterator = containerIterator;

        firstContainerOffset = this.countingInputStream.getCount();
-        records = new ArrayList<SAMRecord>(10000);
+        records = new ArrayList<SAMRecord>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
        normalizer = new CramNormalizer(cramHeader.getSamFileHeader(),
                referenceSource);
        parser = new ContainerParser(cramHeader.getSamFileHeader());
@@ -103,7 +107,7 @@ public class CRAMIterator implements SAMRecordIterator {
        this.containerIterator = containerIterator;

        firstContainerOffset = containerIterator.getFirstContainerOffset();
-        records = new ArrayList<SAMRecord>(10000);
+        records = new ArrayList<SAMRecord>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
        normalizer = new CramNormalizer(cramHeader.getSamFileHeader(),
                referenceSource);
        parser = new ContainerParser(cramHeader.getSamFileHeader());
@@ -143,9 +147,6 @@ public class CRAMIterator implements SAMRecordIterator {
            }
        }

-        if (records == null)
-            records = new ArrayList<SAMRecord>(container.nofRecords);
-        else
        records.clear();
        if (cramRecords == null)
            cramRecords = new ArrayList<CramCompressionRecord>(container.nofRecords);
@@ -172,8 +173,10 @@ public class CRAMIterator implements SAMRecordIterator {

        for (int i = 0; i < container.slices.length; i++) {
            final Slice slice = container.slices[i];
+
            if (slice.sequenceId < 0)
                continue;
+
            if (!slice.validateRefMD5(refs)) {
                final String msg = String.format(
                        "Reference sequence MD5 mismatch for slice: sequence id %d, start %d, span %d, expected MD5 %s",
@@ -201,12 +204,6 @@ public class CRAMIterator implements SAMRecordIterator {

            samRecord.setValidationStringency(validationStringency);

-            if (validationStringency != ValidationStringency.SILENT) {
-                final List<SAMValidationError> validationErrors = samRecord.isValid();
-                SAMUtils.processValidationErrors(validationErrors,
-                        samRecordIndex, validationStringency);
-            }
-
            if (mReader != null) {
                final long chunkStart = (container.offset << 16) | cramRecord.sliceIndex;
                final long chunkEnd = ((container.offset << 16) | cramRecord.sliceIndex) + 1;
@@ -215,7 +212,6 @@ public class CRAMIterator implements SAMRecordIterator {
            }

            records.add(samRecord);
-            samRecordIndex++;
        }
        cramRecords.clear();
        iterator = records.iterator();
@@ -267,7 +263,15 @@ public class CRAMIterator implements SAMRecordIterator {
    @Override
    public SAMRecord next() {
        if (hasNext()) {
-            return iterator.next();
+
+            SAMRecord samRecord = iterator.next();
+
+            if (validationStringency != ValidationStringency.SILENT) {
+                SAMUtils.processValidationErrors(samRecord.isValid(), samRecordIndex++, validationStringency);
+            }
+
+            return samRecord;
+
        } else {
            throw new NoSuchElementException();
        }

--- a/src/main/java/htsjdk/samtools/SAMSequenceRecord.java
+++ b/src/main/java/htsjdk/samtools/SAMSequenceRecord.java
@@ -24,6 +24,8 @@
 package htsjdk.samtools;


+import htsjdk.variant.variantcontext.VariantContext;
+
 import java.math.BigInteger;
 import java.net.URI;
 import java.net.URISyntaxException;
@@ -57,23 +59,27 @@ public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Clonea


    /**
-     * This is not a valid sequence name, because it is reserved in the MRNM field of SAM text format
+     * This is not a valid sequence name, because it is reserved in the RNEXT field of SAM text format
     * to mean "same reference as RNAME field."
     */
-    public static final String RESERVED_MRNM_SEQUENCE_NAME = "=";
+
+    public static final String RESERVED_RNEXT_SEQUENCE_NAME = "=";
+
+    /* use RESERVED_RNEXT_SEQUENCE_NAME instead. */
+    @Deprecated
+    public static final String RESERVED_MRNM_SEQUENCE_NAME = RESERVED_RNEXT_SEQUENCE_NAME;

    /**
     * The standard tags are stored in text header without type information, because the type of these tags is known.
     */
    public static final Set<String> STANDARD_TAGS =
-            new HashSet<String>(Arrays.asList(SEQUENCE_NAME_TAG, SEQUENCE_LENGTH_TAG, ASSEMBLY_TAG, MD5_TAG, URI_TAG,
-                                                SPECIES_TAG));
+            new HashSet<>(Arrays.asList(SEQUENCE_NAME_TAG, SEQUENCE_LENGTH_TAG, ASSEMBLY_TAG, MD5_TAG, URI_TAG, SPECIES_TAG));

-    // Split on any whitespace
-    private static final Pattern SEQUENCE_NAME_SPLITTER = Pattern.compile("\\s");
    // These are the chars matched by \\s.
    private static final char[] WHITESPACE_CHARS = {' ', '\t', '\n', '\013', '\f', '\r'}; // \013 is vertical tab

+    private static final Pattern LEGAL_RNAME_PATTERN = Pattern.compile("[0-9A-Za-z!#$%&+./:;?@^_|~-][0-9A-Za-z!#$%&*+./:;=?@^_|~-]*");
+
    /**
     * @deprecated Use {@link #SAMSequenceRecord(String, int)} instead.
     * sequenceLength is required for the object to be considered valid.
@@ -85,9 +91,6 @@ public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Clonea

    public SAMSequenceRecord(final String name, final int sequenceLength) {
        if (name != null) {
-            if (SEQUENCE_NAME_SPLITTER.matcher(name).find()) {
-                throw new SAMException("Sequence name contains invalid character: " + name);
-            }
            validateSequenceName(name);
            mSequenceName = name.intern();
        } else {
@@ -188,8 +191,8 @@ public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Clonea
    public static String truncateSequenceName(final String sequenceName) {
        /*
         * Instead of using regex split, do it manually for better performance.
-        return SEQUENCE_NAME_SPLITTER.split(sequenceName, 2)[0];
         */
+
        int truncateAt = sequenceName.length();
        for (final char c : WHITESPACE_CHARS) {
            int index = sequenceName.indexOf(c);
@@ -204,8 +207,8 @@ public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Clonea
     * Throw an exception if the sequence name is not valid.
     */
    public static void validateSequenceName(final String name) {
-        if (RESERVED_MRNM_SEQUENCE_NAME.equals(name)) {
-            throw new SAMException("'" + RESERVED_MRNM_SEQUENCE_NAME + "' is not a valid sequence name");
+        if (!LEGAL_RNAME_PATTERN.matcher(name).useAnchoringBounds(true).matches()) {
+            throw new SAMException(String.format("Sequence name '%s' doesn't match regex: '%s' ", name, LEGAL_RNAME_PATTERN));
        }
    }


--- a/src/main/java/htsjdk/samtools/SAMUtils.java
+++ b/src/main/java/htsjdk/samtools/SAMUtils.java
@@ -23,6 +23,7 @@
 */
 package htsjdk.samtools;

+import htsjdk.samtools.seekablestream.SeekableStream;
 import htsjdk.samtools.util.BinaryCodec;
 import htsjdk.samtools.util.CigarUtil;
 import htsjdk.samtools.util.CloserUtil;
@@ -685,6 +686,18 @@ public final class SAMUtils {
        }
    }

+    /**
+     * Returns the virtual file offset of the first record in a BAM file - i.e. the virtual file
+     * offset after skipping over the text header and the sequence records.
+     */
+    public static long findVirtualOffsetOfFirstRecordInBam(final SeekableStream seekableStream) {
+        try {
+            return BAMFileReader.findVirtualOffsetOfFirstRecord(seekableStream);
+        } catch (final IOException ioe) {
+            throw new RuntimeEOFException(ioe);
+        }
+    }
+
    /**
     * Given a Cigar, Returns blocks of the sequence that have been aligned directly to the
     * reference sequence. Note that clipped portions, and inserted and deleted bases (vs. the reference)

--- a/src/main/java/htsjdk/samtools/SBIIndex.java
+++ b/src/main/java/htsjdk/samtools/SBIIndex.java
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2018 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools;
+
+import htsjdk.samtools.util.BinaryCodec;
+import htsjdk.samtools.util.BlockCompressedFilePointerUtil;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.NavigableSet;
+import java.util.Objects;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+
+/**
+ * SBI is an index into BGZF-compressed data files, which has an entry for the file position of the start of every
+ * <i>n</i>th record. Reads files that were created by {@link BAMSBIIndexer}.
+ */
+public final class SBIIndex implements Serializable {
+
+    public static class Header implements Serializable {
+        private final long fileLength;
+        private final byte[] md5;
+        private final byte[] uuid;
+        private final long totalNumberOfRecords;
+        private final long granularity;
+
+        public Header(long fileLength, byte[] md5, byte[] uuid, long totalNumberOfRecords, long granularity) {
+            this.fileLength = fileLength;
+            this.md5 = md5;
+            this.uuid = uuid;
+            this.totalNumberOfRecords = totalNumberOfRecords;
+            this.granularity = granularity;
+        }
+
+        public long getFileLength() {
+            return fileLength;
+        }
+
+        public byte[] getMd5() {
+            return md5;
+        }
+
+        public byte[] getUuid() {
+            return uuid;
+        }
+
+        public long getTotalNumberOfRecords() {
+            return totalNumberOfRecords;
+        }
+
+        public long getGranularity() {
+            return granularity;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+            final Header header = (Header) o;
+            return fileLength == header.fileLength &&
+                    totalNumberOfRecords == header.totalNumberOfRecords &&
+                    granularity == header.granularity &&
+                    Arrays.equals(md5, header.md5) &&
+                    Arrays.equals(uuid, header.uuid);
+        }
+
+        @Override
+        public int hashCode() {
+            int result = Objects.hash(fileLength, totalNumberOfRecords, granularity);
+            result = 31 * result + Arrays.hashCode(md5);
+            result = 31 * result + Arrays.hashCode(uuid);
+            return result;
+        }
+
+        @Override
+        public String toString() {
+            return "Header{" +
+                    "fileLength=" + fileLength +
+                    ", md5=" + Arrays.toString(md5) +
+                    ", uuid=" + Arrays.toString(uuid) +
+                    ", totalNumberOfRecords=" + totalNumberOfRecords +
+                    ", granularity=" + granularity +
+                    '}';
+        }
+    }
+
+    public static final String FILE_EXTENSION = ".sbi";
+
+    /**
+     * SBI magic number.
+     */
+    static final byte[] SBI_MAGIC = "SBI\1".getBytes();
+
+    private final Header header;
+    private final long[] virtualOffsets;
+
+    /**
+     * Create an in-memory SBI with the given virtual offsets.
+     * @param virtualOffsets the offsets in the index
+     */
+    public SBIIndex(final Header header, final long[] virtualOffsets) {
+        this.header = header;
+        this.virtualOffsets = virtualOffsets;
+        if (this.virtualOffsets.length == 0) {
+            throw new RuntimeException("Invalid SBI format: should contain at least one offset");
+        }
+    }
+
+    /**
+     * Load an SBI into memory from a path.
+     * @param path the path to the SBI file
+     * @throws IOException as per java IO contract
+     */
+    public static SBIIndex load(final Path path) throws IOException {
+        try (InputStream in = new BufferedInputStream(Files.newInputStream(path))) {
+            return readIndex(in);
+        }
+    }
+
+    /**
+     * Load an SBI into memory from a stream.
+     * @param in the stream to read the SBI from
+     */
+    public static SBIIndex load(final InputStream in) {
+        return readIndex(in);
+    }
+
+    private static SBIIndex readIndex(final InputStream in) {
+        final BinaryCodec binaryCodec = new BinaryCodec(in);
+        final Header header = readHeader(binaryCodec);
+        final long numOffsetsLong = binaryCodec.readLong();
+        if (numOffsetsLong > Integer.MAX_VALUE) {
+            throw new RuntimeException(String.format("Cannot read SBI with more than %s offsets.", Integer.MAX_VALUE));
+        }
+        final int numOffsets = (int) numOffsetsLong;
+        final long[] virtualOffsets = new long[numOffsets];
+        long prev = -1;
+        for (int i = 0; i < numOffsets; i++) {
+            final long cur = binaryCodec.readLong();
+            if (prev > cur) {
+                throw new RuntimeException(String.format(
+                        "Invalid SBI; offsets not in order: %#x > %#x",
+                        prev, cur));
+            }
+            virtualOffsets[i] = cur;
+            prev = cur;
+        }
+        return new SBIIndex(header, virtualOffsets);
+    }
+
+    private static Header readHeader(final BinaryCodec binaryCodec) {
+        final byte[] buffer = new byte[SBI_MAGIC.length];
+        binaryCodec.readBytes(buffer);
+        if (!Arrays.equals(buffer, SBI_MAGIC)) {
+            throw new RuntimeException("Invalid file header in SBI: " + new String(buffer) + " (" + Arrays.toString(buffer) + ")");
+        }
+        final long fileLength = binaryCodec.readLong();
+        final byte[] md5 = new byte[16];
+        binaryCodec.readBytes(md5);
+        final byte[] uuid = new byte[16];
+        binaryCodec.readBytes(uuid);
+        final long totalNumberOfRecords = binaryCodec.readLong();
+        final long granularity = binaryCodec.readLong();
+        return new Header(fileLength, md5, uuid, totalNumberOfRecords, granularity);
+    }
+
+    /**
+     * Returns the index header.
+     *
+     * @return the header
+     */
+    public Header getHeader() {
+        return header;
+    }
+
+    /**
+     * Returns the granularity of the index, that is the number of alignments between subsequent entries in the index,
+     * or zero if not specified.
+     * @return the granularity of the index
+     */
+    public long getGranularity() {
+        return header.getGranularity();
+    }
+
+    /**
+     * Returns the entries in the index.
+     *
+     * @return an array of file pointers for all the alignment offsets in the index, in ascending order. The last
+     *      virtual file pointer is the position at which the next record would start if it were added to the file.
+     */
+    public long[] getVirtualOffsets() {
+        return virtualOffsets;
+    }
+
+    /**
+     * Returns number of entries in the index.
+     *
+     * @return the number of virtual offsets in the index
+     */
+    public long size() {
+        return virtualOffsets.length;
+    }
+
+    /**
+     * Returns the length of the data file in bytes.
+     *
+     * @return the length of the data file in bytes
+     */
+    public long dataFileLength() {
+        return header.getFileLength();
+    }
+
+    /**
+     * Split the data file for this index into non-overlapping chunks of roughly the given size that cover the whole
+     * file and that can be read independently of one another.
+     *
+     * @param splitSize the rough size of each split in bytes
+     * @return a list of contiguous, non-overlapping, sorted chunks that cover the whole data file
+     * @see #getChunk(long, long)
+     */
+    public List<Chunk> split(final long splitSize) {
+        if (splitSize <= 0) {
+            throw new IllegalArgumentException(String.format("Split size must be positive: %s", splitSize));
+        }
+        final long fileSize = dataFileLength();
+        final List<Chunk> chunks = new ArrayList<>();
+        for (long splitStart = 0; splitStart < fileSize; splitStart += splitSize) {
+            final Chunk chunk = getChunk(splitStart, splitStart + splitSize);
+            if (chunk != null) {
+                chunks.add(chunk);
+            }
+        }
+        return chunks;
+    }
+
+    /**
+     * Return a chunk that corresponds to the given range in the data file. Note that the chunk does not necessarily
+     * completely cover the given range, however this method will map a set of contiguous, non-overlapping file ranges
+     * that cover the whole data file to a set of contiguous, non-overlapping chunks that cover the whole data file.
+     *
+     * @param splitStart the start of the file range (inclusive)
+     * @param splitEnd the start of the file range (exclusive)
+     * @return a chunk whose virtual start is at the first alignment start position that is greater than or equal to the
+     * given split start position, and whose virtual end is at the first alignment start position that is greater than
+     * or equal to the given split end position, or null if the chunk would be empty.
+     * @see #split(long)
+     */
+    public Chunk getChunk(final long splitStart, final long splitEnd) {
+        if (splitStart >= splitEnd) {
+            throw new IllegalArgumentException(String.format("Split start (%s) must be less than end (%s)", splitStart, splitEnd));
+        }
+        final long lastVirtualOffset = virtualOffsets[virtualOffsets.length - 1];
+        final long maxEnd = BlockCompressedFilePointerUtil.getBlockAddress(lastVirtualOffset);
+        final long actualSplitStart = Math.min(splitStart, maxEnd);
+        final long actualSplitEnd = Math.min(splitEnd, maxEnd);
+        final long virtualSplitStart = BlockCompressedFilePointerUtil.makeFilePointer(actualSplitStart);
+        final long virtualSplitEnd = BlockCompressedFilePointerUtil.makeFilePointer(actualSplitEnd);
+        final long virtualSplitStartAlignment = ceiling(virtualSplitStart);
+        final long virtualSplitEndAlignment = ceiling(virtualSplitEnd);
+        if (virtualSplitStartAlignment == virtualSplitEndAlignment) {
+            return null;
+        }
+        return new Chunk(virtualSplitStartAlignment, virtualSplitEndAlignment);
+    }
+
+    private long ceiling(final long virtualOffset) {
+        int index = Arrays.binarySearch(virtualOffsets, virtualOffset);
+        if (index < 0) {
+            index = -index - 1;
+            if (index == virtualOffsets.length) {
+                long lastVirtualOffset = virtualOffsets[virtualOffsets.length - 1];
+                throw new IllegalArgumentException(String.format("No virtual offset found for virtual file pointer %s, last virtual offset %s",
+                        BlockCompressedFilePointerUtil.asString(virtualOffset), BlockCompressedFilePointerUtil.asString(lastVirtualOffset)));
+            }
+        }
+        return virtualOffsets[index];
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        final SBIIndex sbiIndex = (SBIIndex) o;
+        return Objects.equals(header, sbiIndex.header) &&
+                Arrays.equals(virtualOffsets, sbiIndex.virtualOffsets);
+    }
+
+    @Override
+    public int hashCode() {
+        int result = Objects.hash(header);
+        result = 31 * result + Arrays.hashCode(virtualOffsets);
+        return result;
+    }
+
+    @Override
+    public String toString() {
+        String virtualOffsetsString;
+        if (virtualOffsets.length > 30) {
+            virtualOffsetsString = Arrays.toString(Arrays.copyOfRange(virtualOffsets, 0, 30)).replace("]", ", ...]");
+        } else {
+            virtualOffsetsString = Arrays.toString(virtualOffsets);
+        }
+        return "SBIIndex{" +
+                "header=" + header +
+                ", numVirtualOffsets=" + virtualOffsets.length +
+                ", virtualOffsets=" + virtualOffsetsString +
+                '}';
+    }
+}
--- a/src/main/java/htsjdk/samtools/SBIIndexWriter.java
+++ b/src/main/java/htsjdk/samtools/SBIIndexWriter.java
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2018 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools;
+
+import htsjdk.samtools.util.BinaryCodec;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.RuntimeIOException;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+/**
+ * Writes SBI files as understood by {@link SBIIndex}.
+ * <p>
+ * To use this class, first construct an instance from an output stream, and a desired granularity. Then for each
+ * record in the file being indexed, pass the virtual file offset of the record to the {@link #processRecord} method.
+ * The indexer will keep a count of the records passed in an index every <i>n</i>th record. When there are no records
+ * left call {@link #finish} to complete writing the index.
+ */
+public final class SBIIndexWriter {
+
+    // Default to a granularity level of 4096. This is generally sufficient
+    // for very large BAM files, relative to a maximum heap size in the
+    // gigabyte range.
+    public static final long DEFAULT_GRANULARITY = 4096;
+
+    static final byte[] EMPTY_MD5 = new byte[16];
+    static final byte[] EMPTY_UUID = new byte[16];
+
+    private final OutputStream out;
+    private final long granularity;
+    private final Path tempOffsetsFile;
+    private final BinaryCodec tempOffsetsCodec;
+    private long prev = -1;
+    private long recordCount;
+    private long virtualOffsetCount;
+
+    /**
+     * Prepare to write an SBI index with the default granularity.
+     *
+     * @param out the stream to write the index to
+     */
+    public SBIIndexWriter(final OutputStream out) {
+        this(out, SBIIndexWriter.DEFAULT_GRANULARITY);
+    }
+
+    /**
+     * Prepare to write an SBI index.
+     *
+     * @param out         the stream to write the index to
+     * @param granularity write the offset of every <i>n</i>th record to the index
+     */
+    public SBIIndexWriter(final OutputStream out, final long granularity) {
+        this.out = out;
+        this.granularity = granularity;
+        try {
+            // Write the offsets to a temporary file, then write the entire file contents to the output stream at
+            // the end, once we know the number of offsets. This is more efficient than using a List<Long> for very
+            // large numbers of offsets (e.g. 10^8, which is possible for low granularity), since the list resizing
+            // operation is slow.
+            this.tempOffsetsFile = Files.createTempFile("offsets-", ".headerless.sbi");
+            this.tempOffsetsCodec = new BinaryCodec(new BufferedOutputStream(Files.newOutputStream(tempOffsetsFile)));
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        }
+    }
+
+    /**
+     * Process a record for the index: the offset of every <i>n</i>th record will be written to the index.
+     *
+     * @param virtualOffset virtual file pointer of the record
+     */
+    public void processRecord(final long virtualOffset) {
+        if (recordCount++ % granularity == 0) {
+            writeVirtualOffset(virtualOffset);
+        }
+    }
+
+    void writeVirtualOffset(final long virtualOffset) {
+        if (prev > virtualOffset) {
+            throw new IllegalArgumentException(String.format(
+                    "Offsets not in order: %#x > %#x",
+                    prev, virtualOffset));
+        }
+        tempOffsetsCodec.writeLong(virtualOffset);
+        virtualOffsetCount++;
+        prev = virtualOffset;
+    }
+
+    /**
+     * Complete the index, and close the output stream.
+     *
+     * @param finalVirtualOffset the virtual offset at which the next record would start if it were added to the file
+     * @param dataFileLength the length of the data file in bytes
+     */
+    public void finish(final long finalVirtualOffset, final long dataFileLength) {
+        finish(finalVirtualOffset, dataFileLength, null, null);
+    }
+
+    /**
+     * Complete the index, and close the output stream.
+     *
+     * @param finalVirtualOffset the virtual offset at which the next record would start if it were added to the file
+     * @param dataFileLength the length of the data file in bytes
+     * @param md5 the MD5 hash of the data file, or null if not specified
+     * @param uuid the UUID for the data file, or null if not specified
+     */
+    public void finish(final long finalVirtualOffset, final long dataFileLength, final byte[] md5, final byte[] uuid) {
+        if (md5 != null && md5.length != 16) {
+            throw new IllegalArgumentException("Invalid MD5 length: " + md5.length);
+        }
+        if (uuid != null && uuid.length != 16) {
+            throw new IllegalArgumentException("Invalid UUID length: " + uuid.length);
+        }
+        final SBIIndex.Header header = new SBIIndex.Header(dataFileLength, md5 == null ? EMPTY_MD5 : md5, uuid == null ? EMPTY_UUID : uuid, recordCount, granularity);
+        finish(header, finalVirtualOffset);
+    }
+
+    void finish(final SBIIndex.Header header, final long finalVirtualOffset) {
+        // complete writing the temp offsets file
+        writeVirtualOffset(finalVirtualOffset);
+        tempOffsetsCodec.close();
+        try (BinaryCodec binaryCodec = new BinaryCodec(out);
+             InputStream tempOffsets = new BufferedInputStream(Files.newInputStream(tempOffsetsFile))) {
+            // header
+            binaryCodec.writeBytes(SBIIndex.SBI_MAGIC);
+            binaryCodec.writeLong(header.getFileLength());
+            binaryCodec.writeBytes(header.getMd5());
+            binaryCodec.writeBytes(header.getUuid());
+            binaryCodec.writeLong(header.getTotalNumberOfRecords());
+            binaryCodec.writeLong(header.getGranularity());
+            binaryCodec.writeLong(virtualOffsetCount);
+
+            // offsets
+            IOUtil.copyStream(tempOffsets, out);
+        } catch (IOException e) {
+            throw new RuntimeIOException(e);
+        } finally {
+            try {
+                Files.delete(tempOffsetsFile);
+            } catch (IOException e) {
+                throw new RuntimeIOException(e);
+            }
+        }
+    }
+}
--- a/src/main/java/htsjdk/samtools/SamReaderFactory.java
+++ b/src/main/java/htsjdk/samtools/SamReaderFactory.java
@@ -399,7 +399,7 @@ public abstract class SamReaderFactory {
                        }
                    } else if (BlockCompressedInputStream.isValidFile(bufferedStream)) {
                        primitiveSamReader = new SAMTextReader(new BlockCompressedInputStream(bufferedStream), validationStringency, this.samRecordFactory);
-                    } else if (SamStreams.isGzippedSAMFile(bufferedStream)) {
+                    } else if (IOUtil.isGZIPInputStream(bufferedStream)) {
                        primitiveSamReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency, this.samRecordFactory);
                    } else if (SamStreams.isCRAMFile(bufferedStream)) {
                        if (referenceSource == null) {

--- a/src/main/java/htsjdk/samtools/SamStreams.java
+++ b/src/main/java/htsjdk/samtools/SamStreams.java
@@ -4,6 +4,7 @@ import htsjdk.samtools.cram.structure.CramHeader;
 import htsjdk.samtools.seekablestream.SeekableStream;
 import htsjdk.samtools.util.BlockCompressedInputStream;
 import htsjdk.samtools.util.BlockCompressedStreamConstants;
+import htsjdk.samtools.util.IOUtil;

 import java.io.ByteArrayInputStream;
 import java.io.IOException;
@@ -11,7 +12,6 @@ import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.Arrays;
-import java.util.zip.GZIPInputStream;

 /**
 * Utilities related to processing of {@link java.io.InputStream}s encoding SAM data
@@ -66,26 +66,11 @@ public class SamStreams {
    /**
     * Checks whether the file is a gzipped sam file.  Returns true if it
     * is and false otherwise.
+     * @deprecated use {@link IOUtil#isGZIPInputStream(InputStream)} instead
     */
+    @Deprecated
    public static boolean isGzippedSAMFile(final InputStream stream) {
-        if (!stream.markSupported()) {
-            throw new IllegalArgumentException("Cannot test a stream that doesn't support marking.");
-        }
-        stream.mark(8000);
-
-        try {
-            final GZIPInputStream gunzip = new GZIPInputStream(stream);
-            final int ch = gunzip.read();
-            return true;
-        } catch (final IOException ioe) {
-            return false;
-        } finally {
-            try {
-                stream.reset();
-            } catch (final IOException ioe) {
-                throw new IllegalStateException("Could not reset stream.");
-            }
-        }
+        return IOUtil.isGZIPInputStream(stream);
    }

    // Its too expensive to examine the remote file to determine type.

--- a/src/main/java/htsjdk/samtools/apps/TimeChannel.java
+++ b/src/main/java/htsjdk/samtools/apps/TimeChannel.java
@@ -29,7 +29,9 @@ import java.nio.channels.FileChannel;

 /**
 * @author alecw@broadinstitute.org
+ * @deprecated This is deprecated with no replacement.  1/19
 */
+@Deprecated
 public class TimeChannel {
    public static void main(String[] args) throws Exception {
        long fileSize = new File(args[0]).length();

--- a/src/main/java/htsjdk/samtools/apps/TimeRandomAccessFile.java
+++ b/src/main/java/htsjdk/samtools/apps/TimeRandomAccessFile.java
@@ -28,7 +28,9 @@ import java.io.RandomAccessFile;

 /**
 * @author alecw@broadinstitute.org
+ * @deprecated This is deprecated with no replacement. 1/19
 */
+@Deprecated
 public class TimeRandomAccessFile {
    public static void main(String[] args) throws Exception {
        RandomAccessFile raf = new RandomAccessFile(new File(args[0]), "r");

--- a/src/main/java/htsjdk/samtools/cram/build/CompressionHeaderFactory.java
+++ b/src/main/java/htsjdk/samtools/cram/build/CompressionHeaderFactory.java
@@ -18,13 +18,14 @@
 package htsjdk.samtools.cram.build;

 import htsjdk.samtools.cram.common.MutableInt;
-import htsjdk.samtools.cram.encoding.ByteArrayLenEncoding;
-import htsjdk.samtools.cram.encoding.ByteArrayStopEncoding;
-import htsjdk.samtools.cram.encoding.ExternalByteEncoding;
-import htsjdk.samtools.cram.encoding.ExternalCompressor;
-import htsjdk.samtools.cram.encoding.ExternalIntegerEncoding;
-import htsjdk.samtools.cram.encoding.huffman.codec.CanonicalHuffmanIntegerEncoding;
-import htsjdk.samtools.cram.encoding.rans.RANS;
+import htsjdk.samtools.cram.compression.ExternalCompressor;
+import htsjdk.samtools.cram.encoding.*;
+import htsjdk.samtools.cram.encoding.core.CanonicalHuffmanIntegerEncoding;
+import htsjdk.samtools.cram.encoding.external.ByteArrayStopEncoding;
+import htsjdk.samtools.cram.encoding.external.ExternalByteArrayEncoding;
+import htsjdk.samtools.cram.encoding.external.ExternalByteEncoding;
+import htsjdk.samtools.cram.encoding.external.ExternalIntegerEncoding;
+import htsjdk.samtools.cram.compression.rans.RANS;
 import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature;
 import htsjdk.samtools.cram.encoding.readfeatures.Substitution;
 import htsjdk.samtools.cram.structure.CompressionHeader;
@@ -52,15 +53,13 @@ import java.util.TreeMap;
 * This particular version relies heavily on GZIP and RANS for better compression.
 */
 public class CompressionHeaderFactory {
-    private static final int TAG_VALUE_BUFFER_SIZE = 1024 * 1024;
    public static final int BYTE_SPACE_SIZE = 256;
    public static final int ALL_BYTES_USED = -1;
-    private final Map<Integer, EncodingDetails> bestEncodings = new HashMap<>();
-    private final ByteArrayOutputStream baosForTagValues;

-    public CompressionHeaderFactory() {
-        baosForTagValues = new ByteArrayOutputStream(TAG_VALUE_BUFFER_SIZE);
-    }
+    // a parameter for Huffman encoding, so we don't have to re-construct on each call
+    private static final int[] singleZero = new int[] { 0 };
+    private final Map<Integer, EncodingDetails> bestEncodings = new HashMap<>();
+    private final ByteArrayOutputStream baosForTagValues = new ByteArrayOutputStream(1024 * 1024);

    /**
     * Decides on compression methods to use for the given records.
@@ -110,7 +109,7 @@ public class CompressionHeaderFactory {
        builder.addExternalIntegerGzipEncoding(DataSeries.TC_TagCount);
        builder.addExternalIntegerEncoding(DataSeries.TL_TagIdList, ExternalCompressor.createGZIP());
        builder.addExternalIntegerGzipEncoding(DataSeries.TN_TagNameAndType);
-        builder.addExternalIntegerRansOrderOneEncoding(DataSeries.TS_InsetSize);
+        builder.addExternalIntegerRansOrderOneEncoding(DataSeries.TS_InsertSize);

        builder.setTagIdDictionary(buildTagIdDictionary(records));

@@ -348,7 +347,7 @@ public class CompressionHeaderFactory {
        return baosForTagValues.toByteArray();
    }

-    static ByteSizeRange geByteSizeRangeOfTagValues(final List<CramCompressionRecord> records, final int tagID) {
+    static ByteSizeRange getByteSizeRangeOfTagValues(final List<CramCompressionRecord> records, final int tagID) {
        final byte type = getTagType(tagID);
        final ByteSizeRange stats = new ByteSizeRange();
        for (final CramCompressionRecord record : records) {
@@ -401,13 +400,25 @@ public class CompressionHeaderFactory {
        EncodingParams params;
    }

+    /**
+     * Used by buildEncodingForTag to create a ByteArrayLenEncoding with CanonicalHuffmanIntegerEncoding and
+     * ExternalByteArrayEncoding sub-encodings
+     *
+     * @param tagValueSize the size of the tag value, to be Huffman encoded
+     * @param tagID the ID of the tag
+     * @return EncodingParams a complete description of the result Encoding
+     */
+    private EncodingParams buildTagEncodingForSize(final int tagValueSize, final int tagID) {
+        return new ByteArrayLenEncoding(
+                new CanonicalHuffmanIntegerEncoding(new int[] { tagValueSize }, singleZero),
+                new ExternalByteArrayEncoding(tagID)).toParam();
+    }
+
    /**
     * Build an encoding for a specific tag for given records.
     *
-     * @param records
-     *            CRAM records holding the tags
-     * @param tagID
-     *            an integer id of the tag
+     * @param records CRAM records holding the tags
+     * @param tagID an integer id of the tag
     * @return an encoding for the tag
     */
    private EncodingDetails buildEncodingForTag(final List<CramCompressionRecord> records, final int tagID) {
@@ -421,37 +432,31 @@ public class CompressionHeaderFactory {
            case 'A':
            case 'c':
            case 'C':
-                details.params = ByteArrayLenEncoding.toParam(
-                        CanonicalHuffmanIntegerEncoding.toParam(new int[] { 1 }, new int[] { 0 }),
-                        ExternalByteEncoding.toParam(tagID));
+                details.params = buildTagEncodingForSize(1, tagID);
                return details;
+
            case 'I':
            case 'i':
            case 'f':
-                details.params = ByteArrayLenEncoding.toParam(
-                        CanonicalHuffmanIntegerEncoding.toParam(new int[] { 4 }, new int[] { 0 }),
-                        ExternalByteEncoding.toParam(tagID));
+                details.params = buildTagEncodingForSize(4, tagID);
                return details;

            case 's':
            case 'S':
-                details.params = ByteArrayLenEncoding.toParam(
-                        CanonicalHuffmanIntegerEncoding.toParam(new int[] { 2 }, new int[] { 0 }),
-                        ExternalByteEncoding.toParam(tagID));
+                details.params = buildTagEncodingForSize(2, tagID);
                return details;
+
            case 'Z':
            case 'B':
-                final ByteSizeRange stats = geByteSizeRangeOfTagValues(records, tagID);
+                final ByteSizeRange stats = getByteSizeRangeOfTagValues(records, tagID);
                final boolean singleSize = stats.min == stats.max;
                if (singleSize) {
-                    details.params = ByteArrayLenEncoding.toParam(
-                            CanonicalHuffmanIntegerEncoding.toParam(new int[] { stats.min }, new int[] { 0 }),
-                            ExternalByteEncoding.toParam(tagID));
+                    details.params = buildTagEncodingForSize(stats.min, tagID);
                    return details;
                }

                if (type == 'Z') {
-                    details.params = ByteArrayStopEncoding.toParam((byte) '\t', tagID);
+                    details.params = new ByteArrayStopEncoding((byte) '\t', tagID).toParam();
                    return details;
                }

@@ -459,13 +464,14 @@ public class CompressionHeaderFactory {
                if (stats.min > minSize_threshold_ForByteArrayStopEncoding) {
                    final int unusedByte = getUnusedByte(data);
                    if (unusedByte > ALL_BYTES_USED) {
-                        details.params = ByteArrayStopEncoding.toParam((byte) unusedByte, tagID);
+                        details.params = new ByteArrayStopEncoding((byte) unusedByte, tagID).toParam();
                        return details;
                    }
                }

-                details.params = ByteArrayLenEncoding.toParam(ExternalIntegerEncoding.toParam(tagID),
-                        ExternalByteEncoding.toParam(tagID));
+                details.params = new ByteArrayLenEncoding(
+                        new ExternalIntegerEncoding(tagID),
+                        new ExternalByteArrayEncoding(tagID)).toParam();
                return details;
            default:
                throw new IllegalArgumentException("Unknown tag type: " + (char) type);
@@ -502,31 +508,31 @@ public class CompressionHeaderFactory {

        private void addExternalByteArrayStopTabGzipEncoding(final DataSeries dataSeries) {
            addExternalEncoding(dataSeries,
-                    ByteArrayStopEncoding.toParam((byte) '\t', dataSeries.getExternalBlockContentId()),
+                    new ByteArrayStopEncoding((byte) '\t', dataSeries.getExternalBlockContentId()).toParam(),
                    ExternalCompressor.createGZIP());
        }

        private void addExternalIntegerEncoding(final DataSeries dataSeries, final ExternalCompressor compressor) {
            addExternalEncoding(dataSeries,
-                    ExternalIntegerEncoding.toParam(dataSeries.getExternalBlockContentId()),
+                    new ExternalIntegerEncoding(dataSeries.getExternalBlockContentId()).toParam(),
                    compressor);
        }

        private void addExternalIntegerGzipEncoding(final DataSeries dataSeries) {
            addExternalEncoding(dataSeries,
-                    ExternalIntegerEncoding.toParam(dataSeries.getExternalBlockContentId()),
+                    new ExternalIntegerEncoding(dataSeries.getExternalBlockContentId()).toParam(),
                    ExternalCompressor.createGZIP());
        }

        private void addExternalByteGzipEncoding(final DataSeries dataSeries) {
            addExternalEncoding(dataSeries,
-                    ExternalByteEncoding.toParam(dataSeries.getExternalBlockContentId()),
+                    new ExternalByteEncoding(dataSeries.getExternalBlockContentId()).toParam(),
                    ExternalCompressor.createGZIP());
        }

        private void addExternalByteRansOrderOneEncoding(final DataSeries dataSeries) {
            addExternalEncoding(dataSeries,
-                    ExternalByteEncoding.toParam(dataSeries.getExternalBlockContentId()),
+                    new ExternalByteEncoding(dataSeries.getExternalBlockContentId()).toParam(),
                    ExternalCompressor.createRANS(RANS.ORDER.ONE));
        }


--- a/src/main/java/htsjdk/samtools/cram/build/ContainerFactory.java
+++ b/src/main/java/htsjdk/samtools/cram/build/ContainerFactory.java
@@ -19,19 +19,22 @@ package htsjdk.samtools.cram.build;

 import htsjdk.samtools.SAMFileHeader;
 import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.cram.CRAMException;
 import htsjdk.samtools.cram.digest.ContentDigests;
-import htsjdk.samtools.cram.encoding.ExternalCompressor;
+import htsjdk.samtools.cram.compression.ExternalCompressor;
 import htsjdk.samtools.cram.encoding.writer.CramRecordWriter;
 import htsjdk.samtools.cram.io.DefaultBitOutputStream;
 import htsjdk.samtools.cram.io.ExposedByteArrayOutputStream;
-import htsjdk.samtools.cram.structure.Block;
-import htsjdk.samtools.cram.structure.BlockContentType;
+import htsjdk.samtools.cram.structure.block.ExternalBlock;
+import htsjdk.samtools.cram.structure.block.Block;
 import htsjdk.samtools.cram.structure.CompressionHeader;
 import htsjdk.samtools.cram.structure.Container;
 import htsjdk.samtools.cram.structure.CramCompressionRecord;
 import htsjdk.samtools.cram.structure.Slice;
 import htsjdk.samtools.cram.structure.SubstitutionMatrix;
+import htsjdk.samtools.util.RuntimeIOException;

+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -49,16 +52,12 @@ public class ContainerFactory {
        this.recordsPerSlice = recordsPerSlice;
    }

-    public Container buildContainer(final List<CramCompressionRecord> records)
-            throws IllegalArgumentException, IllegalAccessException,
-            IOException {
+    public Container buildContainer(final List<CramCompressionRecord> records) {
        return buildContainer(records, null);
    }

    Container buildContainer(final List<CramCompressionRecord> records,
-                             final SubstitutionMatrix substitutionMatrix)
-            throws IllegalArgumentException, IllegalAccessException,
-            IOException {
+                             final SubstitutionMatrix substitutionMatrix) {

        // sets header APDelta
        final boolean coordinateSorted = samFileHeader.getSortOrder() == SAMFileHeader.SortOrder.coordinate;
@@ -114,17 +113,12 @@ public class ContainerFactory {
    }

    private static Slice buildSlice(final List<CramCompressionRecord> records,
-                                    final CompressionHeader header)
-            throws IllegalArgumentException, IllegalAccessException,
-            IOException {
-        final Map<Integer, ExposedByteArrayOutputStream> map = new HashMap<Integer, ExposedByteArrayOutputStream>();
+                                    final CompressionHeader header) {
+        final Map<Integer, ByteArrayOutputStream> externalBlockMap = new HashMap<>();
        for (final int id : header.externalIds) {
-            map.put(id, new ExposedByteArrayOutputStream());
+            externalBlockMap.put(id, new ByteArrayOutputStream());
        }

-        final ExposedByteArrayOutputStream bitBAOS = new ExposedByteArrayOutputStream();
-        final DefaultBitOutputStream bitOutputStream = new DefaultBitOutputStream(bitBAOS);
-
        final Slice slice = new Slice();
        slice.nofRecords = records.size();

@@ -165,26 +159,32 @@ public class ContainerFactory {
            slice.alignmentSpan = maxAlEnd - minAlStart + 1;
        }

-        final CramRecordWriter writer = new CramRecordWriter(bitOutputStream, map, header, slice.sequenceId);
+        try (final ByteArrayOutputStream bitBAOS = new ByteArrayOutputStream();
+             final DefaultBitOutputStream bitOutputStream = new DefaultBitOutputStream(bitBAOS)) {
+
+            final CramRecordWriter writer = new CramRecordWriter(bitOutputStream, externalBlockMap, header, slice.sequenceId);
            writer.writeCramCompressionRecords(records, slice.alignmentStart);

            bitOutputStream.close();
-        slice.coreBlock = Block.buildNewCore(bitBAOS.toByteArray());
-
-        slice.external = new HashMap<Integer, Block>();
-        for (final Integer key : map.keySet()) {
-            final ExposedByteArrayOutputStream os = map.get(key);
-
-            final Block externalBlock = new Block();
-            externalBlock.setContentId(key);
-            externalBlock.setContentType(BlockContentType.EXTERNAL);
-
-            final ExternalCompressor compressor = header.externalCompressors.get(key);
-            final byte[] rawData = os.toByteArray();
-            final byte[] compressed = compressor.compress(rawData);
-            externalBlock.setContent(rawData, compressed);
-            externalBlock.setMethod(compressor.getMethod());
-            slice.external.put(key, externalBlock);
+            slice.coreBlock = Block.createRawCoreDataBlock(bitBAOS.toByteArray());
+        }
+        catch (final IOException e) {
+            throw new RuntimeIOException(e);
+        }
+
+        slice.external = new HashMap<>();
+        for (final Integer contentId : externalBlockMap.keySet()) {
+            // remove after https://github.com/samtools/htsjdk/issues/1232
+            if (contentId == Block.NO_CONTENT_ID) {
+                throw new CRAMException("Valid Content ID required.  Given: " + contentId);
+            }
+
+            final ExternalCompressor compressor = header.externalCompressors.get(contentId);
+            final byte[] rawContent = externalBlockMap.get(contentId).toByteArray();
+            final ExternalBlock externalBlock = new ExternalBlock(compressor.getMethod(), contentId,
+                    compressor.compress(rawContent), rawContent.length);
+
+            slice.external.put(contentId, externalBlock);
        }

        return slice;

--- a/src/main/java/htsjdk/samtools/cram/build/CramIO.java
+++ b/src/main/java/htsjdk/samtools/cram/build/CramIO.java
@@ -24,7 +24,7 @@ import htsjdk.samtools.cram.common.Version;
 import htsjdk.samtools.cram.io.CountingInputStream;
 import htsjdk.samtools.cram.io.ExposedByteArrayOutputStream;
 import htsjdk.samtools.cram.io.InputStreamUtils;
-import htsjdk.samtools.cram.structure.Block;
+import htsjdk.samtools.cram.structure.block.Block;
 import htsjdk.samtools.cram.structure.Container;
 import htsjdk.samtools.cram.structure.ContainerIO;
 import htsjdk.samtools.cram.structure.CramHeader;
@@ -248,7 +248,7 @@ public class CramIO {
        final int length = Math.max(1024, data.length + data.length / 2);
        final byte[] blockContent = new byte[length];
        System.arraycopy(data, 0, blockContent, 0, Math.min(data.length, length));
-        final Block block = Block.buildNewFileHeaderBlock(blockContent);
+        final Block block = Block.createRawFileHeaderBlock(blockContent);

        final Container container = new Container();
        container.blockCount = 1;
@@ -279,18 +279,18 @@ public class CramIO {
            if (version.compatibleWith(CramVersions.CRAM_v3)) {
                final byte[] bytes = new byte[container.containerByteSize];
                InputStreamUtils.readFully(inputStream, bytes, 0, bytes.length);
-                block = Block.readFromInputStream(version.major, new ByteArrayInputStream(bytes));
+                block = Block.read(version.major, new ByteArrayInputStream(bytes));
                // ignore the rest of the container
            } else {
                /*
                 * pending issue: container.containerByteSize inputStream 2 bytes shorter
 				 * then needed in the v21 test cram files.
 				 */
-                block = Block.readFromInputStream(version.major, inputStream);
+                block = Block.read(version.major, inputStream);
            }
        }

-        inputStream = new ByteArrayInputStream(block.getRawContent());
+        inputStream = new ByteArrayInputStream(block.getUncompressedContent());

        final ByteBuffer buffer = ByteBuffer.allocate(4);
        buffer.order(ByteOrder.LITTLE_ENDIAN);
@@ -327,7 +327,7 @@ public class CramIO {
        final long pos = countingInputStream.getCount();
        countingInputStream.close();

-        final Block block = Block.buildNewFileHeaderBlock(toByteArray(newHeader.getSamFileHeader()));
+        final Block block = Block.createRawFileHeaderBlock(toByteArray(newHeader.getSamFileHeader()));
        final ExposedByteArrayOutputStream byteArrayOutputStream = new ExposedByteArrayOutputStream();
        block.write(newHeader.getVersion().major, byteArrayOutputStream);
        if (byteArrayOutputStream.size() > c.containerByteSize) {

--- a/src/main/java/htsjdk/samtools/cram/io/ExternalCompression.java
+++ b/src/main/java/htsjdk/samtools/cram/io/ExternalCompression.java
-package htsjdk.samtools.cram.io;
+package htsjdk.samtools.cram.compression;

-import htsjdk.samtools.cram.encoding.rans.RANS;
+import htsjdk.samtools.cram.compression.rans.RANS;
+import htsjdk.samtools.cram.io.InputStreamUtils;
+import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
 import htsjdk.samtools.util.IOUtil;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
@@ -149,4 +151,25 @@ public class ExternalCompression {
        buffer.get(bytes);
        return bytes;
    }
+
+    public static byte[] uncompress(final BlockCompressionMethod method, final byte[] compressedContent) {
+        try {
+            switch (method) {
+                case RAW:
+                    return compressedContent;
+                case GZIP:
+                    return gunzip(compressedContent);
+                case BZIP2:
+                    return unbzip2(compressedContent);
+                case LZMA:
+                    return unxz(compressedContent);
+                case RANS:
+                    return unrans(compressedContent);
+                default:
+                    throw new RuntimeException("Unknown block compression method: " + method.name());
+            }
+        } catch (final IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
 }
--- a/src/main/java/htsjdk/samtools/cram/encoding/ExternalCompressor.java
+++ b/src/main/java/htsjdk/samtools/cram/encoding/ExternalCompressor.java
-package htsjdk.samtools.cram.encoding;
+package htsjdk.samtools.cram.compression;

-import htsjdk.samtools.cram.encoding.rans.RANS.ORDER;
-import htsjdk.samtools.cram.io.ExternalCompression;
-import htsjdk.samtools.cram.structure.BlockCompressionMethod;
+import htsjdk.samtools.cram.structure.block.BlockCompressionMethod;
+import htsjdk.samtools.cram.compression.rans.RANS.ORDER;

 import java.io.IOException;


--- a/src/main/java/htsjdk/samtools/cram/encoding/rans/Constants.java
+++ b/src/main/java/htsjdk/samtools/cram/encoding/rans/Constants.java
-package htsjdk.samtools.cram.encoding.rans;
+package htsjdk.samtools.cram.compression.rans;

 class Constants {
    static final int TF_SHIFT = 12;