Olivier Sallou · Olivier Sallou · Olivier Sallou · Olivier Sallou · Olivier Sallou · Olivier Sallou
--- a/Dockerfile
+++ b/Dockerfile
-FROM broadinstitute/java-baseimage
+FROM openjdk:8 
 MAINTAINER Broad Institute DSDE <dsde-engineering@broadinstitute.org>

 ARG build_command=shadowJar

--- a/build.gradle
+++ b/build.gradle
@@ -64,7 +64,7 @@ def ensureBuildPrerequisites(requiredJavaVersion, buildPrerequisitesMessage) {
 }
 ensureBuildPrerequisites(requiredJavaVersion, buildPrerequisitesMessage)

-final htsjdkVersion = System.getProperty('htsjdk.version', '2.16.1')
+final htsjdkVersion = System.getProperty('htsjdk.version', '2.18.2')

 // We use a custom shaded build of the NIO library to avoid a regression in the authentication layer.
 // GATK does the same, see https://github.com/broadinstitute/gatk/issues/3591

--- a/debian/changelog
+++ b/debian/changelog
+picard-tools (2.18.25+dfsg-1) unstable; urgency=medium
+
+  * New upstream release 
+
+ -- Olivier Sallou <osallou@debian.org>  Fri, 25 Jan 2019 10:46:36 +0000
+
 picard-tools (2.18.23+dfsg-2) unstable; urgency=medium

  * Fix Class-Path in manifest file for dependencies  (Closes: #919559).

--- a/debian/control
+++ b/debian/control
@@ -19,7 +19,7 @@ Build-Depends: default-jdk (>= 2:1.9~),
               libgkl-java,
               libgatk-native-bindings-java,
 # htsjdk and picard-tools are relased nearly together
-               libhtsjdk-java (>= 2.16.1~),
+               libhtsjdk-java (>= 2.18.2~),
 # required for tests:
               testng (>= 6.9.10),
               r-base-core,

--- a/debian/patches/20-shadowjar.patch
+++ b/debian/patches/20-shadowjar.patch
@@ -2,7 +2,7 @@ Description: do not use shadowjar
 Author: Sascha Steinbiss <satta@debian.org>
 --- a/build.gradle
 +++ b/build.gradle
-@@ -126,7 +126,7 @@ group = 'com.github.broadinstitute'
+@@ -126,7 +126,7 @@
 
 defaultTasks 'all'
 
@@ -11,7 +11,7 @@ Author: Sascha Steinbiss <satta@debian.org>
 
 // Source file names for the picard command line properties file. We select and include only one of
 // these two files in each jar, renamed to "picardCmdLine.properties", depending on which parser we
-@@ -214,6 +214,7 @@ task picardDoc(type: Javadoc, dependsOn:
+@@ -213,6 +213,7 @@
     options.addStringOption("verbose")
 }
 
@@ -19,7 +19,7 @@ Author: Sascha Steinbiss <satta@debian.org>
 task currentJar(type: Copy){
     from shadowJar
     into new File(buildDir, "libs")
-@@ -231,8 +232,7 @@ shadowJar {
+@@ -230,8 +231,7 @@
         }
     }
 }

--- a/debian/patches/fix_unreadabletests.patch
+++ b/debian/patches/fix_unreadabletests.patch
@@ -43,7 +43,7 @@ Forwarded: no
      * 4 cases tested:
 --- a/src/test/java/picard/util/LiftoverVcfTest.java
 +++ b/src/test/java/picard/util/LiftoverVcfTest.java
-@@ -1196,30 +1196,4 @@
+@@ -1187,30 +1187,4 @@
         Assert.assertEquals(runPicardCommandLine(args), 1);
     }
 

--- a/src/main/java/picard/sam/AddOATag.java
+++ b/src/main/java/picard/sam/AddOATag.java
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2018 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.sam;
+
+import htsjdk.samtools.*;
+import htsjdk.samtools.util.*;
+import org.broadinstitute.barclay.argparser.Argument;
+import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
+import org.broadinstitute.barclay.help.DocumentedFeature;
+import picard.PicardException;
+import picard.cmdline.CommandLineProgram;
+import picard.cmdline.StandardOptionDefinitions;
+import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.Optional;
+
+@CommandLineProgramProperties(
+        summary = AddOATag.USAGE_DETAILS,
+        oneLineSummary = AddOATag.USAGE_SUMMARY,
+        programGroup = ReadDataManipulationProgramGroup.class)
+@DocumentedFeature
+public class AddOATag extends CommandLineProgram {
+
+    static final String USAGE_SUMMARY = "Record current alignment information to OA tag.";
+    static final String USAGE_DETAILS = "This tool takes in an aligned SAM or BAM and adds the " +
+            "OA tag to every aligned read unless an interval list is specified, where it only adds the tag to reads " +
+            "that fall within the intervals in the interval list. This can be useful if you are about to realign but want " +
+            "to keep the original alignment information as a separate tag." +
+            "<br />"+
+            "<h4>Usage example:</h4>" +
+            "<pre>" +
+            "java -jar picard.jar AddOATag \\<br />" +
+            "      L=some_picard.interval_list \\<br />" +
+            "      I=sorted.bam \\<br />" +
+            "      O=fixed.bam <br />"+
+            "</pre>";
+
+    @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "SAM or BAM input file")
+    public String INPUT;
+
+    @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "SAM or BAM file to write merged result to")
+    public String OUTPUT;
+
+    @Argument(shortName = "L", doc = "If provided, only records that overlap given interval list will have the OA tag added.", optional = true)
+    public File INTERVAL_LIST;
+
+    /**
+     * Original Alignment tag key.
+     */
+    public static final String OA = "OA";
+    private static final Log log = Log.getInstance(AddOATag.class);
+
+    @Override
+    protected int doWork() {
+        try (final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(IOUtil.getPath(INPUT));
+             final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), true, IOUtil.getPath(OUTPUT), REFERENCE_SEQUENCE)) {
+                writer.setProgressLogger(
+                        new ProgressLogger(log, (int) 1e7, "Wrote", "records"));
+
+                final OverlapDetector overlapDetector = getOverlapDetectorFromIntervalListFile(INTERVAL_LIST, 0, 0);
+                for (final SAMRecord rec : reader) {
+                    if (overlapDetector == null || overlapDetector.overlapsAny(rec)) {
+                        setOATag(rec);
+                    }
+                    writer.addAlignment(rec);
+                }
+        } catch (IOException e) {
+            log.error(e);
+            return 1;
+        }
+
+        return 0;
+    }
+
+    // Take an interval list file and convert it to an overlap detector, can add left and right padding
+    static OverlapDetector<Interval> getOverlapDetectorFromIntervalListFile(File intervalList, int lhsBuffer, int rhsBuffer) {
+        if (intervalList == null) {
+            return null;
+        }
+        List<Interval> intervals = IntervalList.fromFile(intervalList).uniqued().getIntervals();
+        OverlapDetector<Interval> detector = new OverlapDetector<>(lhsBuffer, rhsBuffer);
+        detector.addAll(intervals, intervals);
+        return detector;
+    }
+
+    // format OA tag string according to the spec
+    //TODO: Move this to htsjdk once https://github.com/samtools/hts-specs/pull/193 is merged
+    private void setOATag(SAMRecord rec) {
+        if (rec.getReferenceName().contains(",")) {
+            throw new PicardException(String.format("Reference name for record %s contains a comma character.", rec.getReadName()));
+        }
+        final String oaValue;
+        if (rec.getReadUnmappedFlag()) {
+            oaValue = String.format("*,0,%s,*,255,;", rec.getReadNegativeStrandFlag() ? "-" : "+");
+        } else {
+            oaValue = String.format("%s,%s,%s,%s,%s,%s;",
+                    rec.getReferenceName(),
+                    rec.getAlignmentStart(),
+                    rec.getReadNegativeStrandFlag() ? "-" : "+",
+                    rec.getCigarString(),
+                    rec.getMappingQuality(),
+                    Optional.ofNullable(rec.getAttribute(SAMTag.NM.name())).orElse(""));
+        }
+        rec.setAttribute(OA, Optional.ofNullable(rec.getAttribute(OA)).orElse("") +  oaValue);
+    }
+}
--- a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java
+++ b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java
@@ -756,23 +756,14 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
            if (firstOfNextChunk != null && areComparableForDuplicates(firstOfNextChunk, next, true, useBarcodes)) {
                nextChunk.add(next);
            } else {
-                if (nextChunk.size() > 1) {
-                    markDuplicatePairs(nextChunk);
-                    if (TAG_DUPLICATE_SET_MEMBERS) {
-                        addRepresentativeReadIndex(nextChunk);
-                    }
-                }
+                handleChunk(nextChunk);
                nextChunk.clear();
                nextChunk.add(next);
                firstOfNextChunk = next;
            }
        }
-        if (nextChunk.size() > 1) {
-            markDuplicatePairs(nextChunk);
-            if (TAG_DUPLICATE_SET_MEMBERS) {
-                addRepresentativeReadIndex(nextChunk);
-            }
-        }
+        handleChunk(nextChunk);
+
        this.pairSort.cleanup();
        this.pairSort = null;

@@ -813,6 +804,17 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
        }
    }

+    private void handleChunk(List<ReadEndsForMarkDuplicates> nextChunk) {
+        if (nextChunk.size() > 1) {
+            markDuplicatePairs(nextChunk);
+            if (TAG_DUPLICATE_SET_MEMBERS) {
+                addRepresentativeReadIndex(nextChunk);
+            }
+        } else if (nextChunk.size() == 1) {
+            addSingletonToCount(libraryIdGenerator);
+        }
+    }
+
    private boolean areComparableForDuplicates(final ReadEndsForMarkDuplicates lhs, final ReadEndsForMarkDuplicates rhs, final boolean compareRead2, final boolean useBarcodes) {
        boolean areComparable = lhs.libraryId == rhs.libraryId;


--- a/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
+++ b/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
@@ -188,6 +188,11 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
            file.setHistogram(metricsByLibrary.values().iterator().next().calculateRoiHistogram());
        }

+        // Add set size histograms - the set size counts are printed on adjacent columns to the ROI metric.
+        file.addHistogram(libraryIdGenerator.getDuplicateCountHist());
+        file.addHistogram(libraryIdGenerator.getOpticalDuplicateCountHist());
+        file.addHistogram(libraryIdGenerator.getNonOpticalDuplicateCountHist());
+
        file.write(METRICS_FILE);
    }

@@ -260,6 +265,7 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
        }

        // Check if we need to partition since the orientations could have changed
+        final int nOpticalDup;
        if (hasFR && hasRF) { // need to track them independently
            // Variables used for optical duplicate detection and tracking
            final List<ReadEnds> trackOpticalDuplicatesF = new ArrayList<>();
@@ -277,11 +283,26 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
            }

            // track the duplicates
-            trackOpticalDuplicates(trackOpticalDuplicatesF, keeper, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
-            trackOpticalDuplicates(trackOpticalDuplicatesR, keeper, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
+            final int nOpticalDupF = trackOpticalDuplicates(trackOpticalDuplicatesF,
+                    keeper,
+                    opticalDuplicateFinder,
+                    libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
+            final int nOpticalDupR = trackOpticalDuplicates(trackOpticalDuplicatesR,
+                    keeper,
+                    opticalDuplicateFinder,
+                    libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
+            nOpticalDup = nOpticalDupF + nOpticalDupR;
        } else { // No need to partition
-            AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(ends, keeper, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
+            nOpticalDup = trackOpticalDuplicates(ends, keeper, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
+        }
+        trackDuplicateCounts(ends.size(),
+                nOpticalDup,
+                libraryIdGenerator);
    }
+
+    public static void addSingletonToCount(final LibraryIdGenerator libraryIdGenerator) {
+        libraryIdGenerator.getDuplicateCountHist().increment(1.0);
+        libraryIdGenerator.getNonOpticalDuplicateCountHist().increment(1.0);
    }

    /**
@@ -294,7 +315,7 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
     * optical duplicate detection, we do not consider them duplicates if one read as FR and the other RF when we order orientation by the
     * first mate sequenced (read #1 of the pair).
     */
-    private static void trackOpticalDuplicates(final List<? extends ReadEnds> list,
+    private static int trackOpticalDuplicates(final List<? extends ReadEnds> list,
                                              final ReadEnds keeper,
                                              final OpticalDuplicateFinder opticalDuplicateFinder,
                                              final Histogram<Short> opticalDuplicatesByLibraryId) {
@@ -311,5 +332,23 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
        if (opticalDuplicates > 0) {
            opticalDuplicatesByLibraryId.increment(list.get(0).getLibraryId(), opticalDuplicates);
        }
+        return opticalDuplicates;
+    }
+
+    private static void trackDuplicateCounts(final int listSize,
+                                             final int optDupCnt,
+                                             final LibraryIdGenerator libraryIdGenerator) {
+
+        final Histogram<Double> duplicatesCountHist = libraryIdGenerator.getDuplicateCountHist();
+        final Histogram<Double> nonOpticalDuplicatesCountHist = libraryIdGenerator.getNonOpticalDuplicateCountHist();
+        final Histogram<Double> opticalDuplicatesCountHist = libraryIdGenerator.getOpticalDuplicateCountHist();
+
+        duplicatesCountHist.increment((double) listSize);
+        if ((listSize - optDupCnt) > 0) {
+            nonOpticalDuplicatesCountHist.increment((double) (listSize - optDupCnt));
+        }
+        if (optDupCnt > 0) {
+            opticalDuplicatesCountHist.increment((double) (optDupCnt + 1.0));
+        }
    }
 }
--- a/src/main/java/picard/sam/markduplicates/util/LibraryIdGenerator.java
+++ b/src/main/java/picard/sam/markduplicates/util/LibraryIdGenerator.java
@@ -47,9 +47,11 @@ public class LibraryIdGenerator {
    private final SAMFileHeader header;
    private final Map<String, Short> libraryIds = new HashMap<String, Short>(); // from library string to library id
    private short nextLibraryId = 1;
-    private final Map<String, DuplicationMetrics> metricsByLibrary = new HashMap<String, DuplicationMetrics>();
-    private final Histogram<Short> opticalDuplicatesByLibraryId = new Histogram<Short>();
-
+    private final Map<String, DuplicationMetrics> metricsByLibrary = new HashMap<>();
+    private final Histogram<Short> opticalDuplicatesByLibraryId = new Histogram<>();
+    private final Histogram<Double> duplicateCountHist = new Histogram<>("set_size", "all_sets");
+    private final Histogram<Double> nonOpticalDuplicateCountHist = new Histogram<>("set_size", "non_optical_sets");
+    private final Histogram<Double> opticalDuplicateCountHist = new Histogram<>("set_size", "optical_sets");

    public LibraryIdGenerator(final SAMFileHeader header) {
        this.header = header;
@@ -65,13 +67,31 @@ public class LibraryIdGenerator {
        }
    }

-    public Map<String, Short> getLibraryIdsMap() { return this.libraryIds; }
+    public Map<String, Short> getLibraryIdsMap() {
+        return this.libraryIds;
+    }
+
+    public Map<String, DuplicationMetrics> getMetricsByLibraryMap() {
+        return this.metricsByLibrary;
+    }
+
+    public Histogram<Short> getOpticalDuplicatesByLibraryIdMap() {
+        return this.opticalDuplicatesByLibraryId;
+    }
+
+    public Histogram<Double> getDuplicateCountHist() {
+        return this.duplicateCountHist;
+    }

-    public Map<String, DuplicationMetrics> getMetricsByLibraryMap() { return this.metricsByLibrary; }
+    public Histogram<Double> getNonOpticalDuplicateCountHist() {
+        return this.nonOpticalDuplicateCountHist;
+    }

-    public Histogram<Short> getOpticalDuplicatesByLibraryIdMap() { return this.opticalDuplicatesByLibraryId; }
+    public Histogram<Double> getOpticalDuplicateCountHist() {
+        return this.opticalDuplicateCountHist;
+    }

-	public static String getReadGroupLibraryName(SAMReadGroupRecord readGroup) {
+    public static String getReadGroupLibraryName(final SAMReadGroupRecord readGroup) {
 		return Optional.ofNullable(readGroup.getLibrary())
 				.orElse(UNKNOWN_LIBRARY);
 	}

--- a/src/main/java/picard/sam/markduplicates/util/OpticalDuplicateFinder.java
+++ b/src/main/java/picard/sam/markduplicates/util/OpticalDuplicateFinder.java
@@ -28,9 +28,10 @@ import htsjdk.samtools.util.Log;
 import htsjdk.samtools.util.ProgressLogger;
 import picard.sam.util.PhysicalLocation;
 import picard.sam.util.ReadNameParser;
+import picard.util.GraphUtils;

 import java.io.Serializable;
-import java.util.List;
+import java.util.*;

 /**
 * Contains methods for finding optical/co-localized/sequencing duplicates.
@@ -83,7 +84,6 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
    }

    /**
-     *
     * @param readNameRegex                 see {@link ReadNameParser#DEFAULT_READ_NAME_REGEX}.
     * @param opticalDuplicatePixelDistance the optical duplicate pixel distance
     * @param log                           the log to which to write messages.
@@ -94,7 +94,6 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
    }

    /**
-     *
     * @param readNameRegex                 see {@link ReadNameParser#DEFAULT_READ_NAME_REGEX}.
     * @param opticalDuplicatePixelDistance the optical duplicate pixel distance
     * @param maxDuplicateSetSize           the size of a set that is too big enough to process
@@ -130,7 +129,6 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
            return opticalDuplicateFlags;
        }

-        final int distance = this.opticalDuplicatePixelDistance;
        final PhysicalLocation actualKeeper = keeperOrNull(list, keeper);

        final Log log;
@@ -149,13 +147,26 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
            progressLoggerForKeeper = null;
            progressLoggerForRest = null;
        }
+        if (length >= (keeper == null ? 3 : 4)) {
+            return getOpticalDuplicatesFlagWithGraph(list, actualKeeper, opticalDuplicateFlags, log, progressLoggerForKeeper, progressLoggerForRest, logProgress);
+        } else {
+            return getOpticalDuplicatesFlagFast(list, actualKeeper, opticalDuplicateFlags, log, progressLoggerForKeeper, progressLoggerForRest, logProgress);
+        }
+    }
+
+    /**
+     * Compute optical duplicates quickly in the standard case where we know that there won't be any transitive distances to worry about.
+     *
+     * Note, this is guaranteed to be correct when there are at most 2x reads from a readgroup or 3x with the keeper present
+     */
+    private boolean[] getOpticalDuplicatesFlagFast(List<? extends PhysicalLocation> list, PhysicalLocation actualKeeper, boolean[] opticalDuplicateFlags, Log log, ProgressLogger progressLoggerForKeeper, ProgressLogger progressLoggerForRest, boolean logProgress) {
+        final int length = list.size();

        // First go through and compare all the reads to the keeper
        if (actualKeeper != null) {
            for (int i = 0; i < length; ++i) {
                final PhysicalLocation other = list.get(i);
-                opticalDuplicateFlags[i] = closeEnough(actualKeeper, other, distance);
-
+                opticalDuplicateFlags[i] = closeEnough(actualKeeper, other, this.opticalDuplicatePixelDistance);
                // The main point of adding this log and if statement (also below) is a workaround a bug in the JVM
                // which causes a deep exception (https://github.com/broadinstitute/picard/issues/472).
                // It seems that this is related to https://bugs.openjdk.java.net/browse/JDK-8033717 which
@@ -163,25 +174,21 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
                // every time we tried to duplicate-mark it. The problem seemed to be a duplicate-set of size 500,000,
                // and this loop seemed to kill the JVM for some reason. This logging statement (and the one in the
                // loop below) solved the problem.
-                if (logProgress) progressLoggerForKeeper.record(String.format("%d", other.getReadGroup()), other.getX());
            }
        }
        if (logProgress) log.debug("Done with comparing to keeper, now the rest.");
-
        // Now go through and do each pairwise comparison not involving the actualKeeper
        for (int i = 0; i < length; ++i) {
            final PhysicalLocation lhs = list.get(i);
            if (lhs == actualKeeper) continue; // no comparisons to actualKeeper since those are all handled above
-
            // logging here for same reason as above
            if (logProgress) progressLoggerForRest.record(String.format("%d", lhs.getReadGroup()), lhs.getX());
-
            for (int j = i + 1; j < length; ++j) {
                final PhysicalLocation rhs = list.get(j);
                if (rhs == actualKeeper) continue; // no comparisons to actualKeeper since those are all handled above
-                if (opticalDuplicateFlags[i] && opticalDuplicateFlags[j]) continue; // both already marked, no need to check
-
-                if (closeEnough(lhs, rhs, distance)) {
+                if (opticalDuplicateFlags[i] && opticalDuplicateFlags[j])
+                    continue; // both already marked, no need to check
+                if (closeEnough(lhs, rhs, this.opticalDuplicatePixelDistance)) {
                    // At this point we want to mark either lhs or rhs as duplicate. Either could have been marked
                    // as a duplicate of the keeper (but not both - that's checked above), so be careful about which
                    // one to now mark as a duplicate.
@@ -190,10 +197,123 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
                }
            }
        }
+        return opticalDuplicateFlags;
+    }
+
+    /**
+     * Compute the optical duplicates correctly in the case where the duplicate group could end up with transitive optical duplicates
+     */
+    private boolean[] getOpticalDuplicatesFlagWithGraph(List<? extends PhysicalLocation> list, PhysicalLocation keeper, boolean[] opticalDuplicateFlags, Log log, ProgressLogger progressLoggerForKeeper, ProgressLogger progressLoggerForRest, boolean logProgress) {
+        // Make a graph where the edges are reads that lie within the optical duplicate pixel distance from each other,
+        // we will then use the union-find algorithm to cluster the graph and find optical duplicate groups
+        final GraphUtils.Graph<Integer> opticalDistanceRelationGraph = new GraphUtils.Graph<>();
+        if (logProgress) {
+            log.debug("Building adjacency graph for duplicate group");
+        }
+
+        final Map<Integer, List<Integer>> tileRGmap = new HashMap<>();
+
+        int keeperIndex = -1;
+        for (int i = 0; i < list.size(); i++) {
+            PhysicalLocation currentLoc = list.get(i);
+            if (currentLoc == keeper) {
+                keeperIndex = i;
+            }
+            if (currentLoc.hasLocation()) {
+                final int key = ((int) currentLoc.getReadGroup() << 16) + currentLoc.getTile();
+
+                if (tileRGmap.containsKey(key)) {
+                    tileRGmap.get(key).add(i);
+                } else {
+                    final List<Integer> pLocation = new ArrayList<>();
+                    pLocation.add(i);
+                    tileRGmap.put(key, pLocation);
+                }
+            }
+            opticalDistanceRelationGraph.addNode(i);
+        }
+
+        // Since because finding adjacent optical duplicates is an O(n^2) operation, we can subdivide the input into its
+        // readgroups in order to reduce the amount of redundant checks across readgroups between reads.
+        for (List<Integer> tileGroup : tileRGmap.values()) {
+            if (tileGroup.size() > 1) {
+                fillGraphFromAGroup(list, tileGroup, logProgress, progressLoggerForKeeper,  this.opticalDuplicatePixelDistance, opticalDistanceRelationGraph);
+            }
+        }
+
+        if (logProgress) {
+            log.debug("Finished building adjacency graph for duplicate group, moving onto clustering");
+        }
+
+        // Keep a map of the reads and their cluster assignments
+        final Map<Integer, Integer> opticalDuplicateClusterMap = opticalDistanceRelationGraph.cluster();
+        final Map<Integer, Integer> clusterToRepresentativeRead = new HashMap<>();
+        Integer keeperCluster = null;
+
+        // Specially mark the keeper as specifically not a duplicate if it exists
+        if (keeperIndex >= 0) {
+            clusterToRepresentativeRead.put(opticalDuplicateClusterMap.get(keeperIndex), keeperIndex);
+            keeperCluster = opticalDuplicateClusterMap.get(keeperIndex);
+        }
+
+        for (final Map.Entry<Integer, Integer> entry : opticalDuplicateClusterMap.entrySet()) {
+            // logging here for same reason as above
+            final int recordIndex = entry.getKey();
+            final int recordAssignedCluster = entry.getValue();
+            if (logProgress) {
+                progressLoggerForRest.record(String.format("%d", list.get(recordIndex).getReadGroup()), list.get(recordIndex).getX());
+            }
+
+            // If its not the first read we've seen for this cluster, mark it as an optical duplicate
+            if (clusterToRepresentativeRead.containsKey(recordAssignedCluster) && recordIndex != keeperIndex) {
+                final PhysicalLocation representativeLoc = list.get(clusterToRepresentativeRead.get(recordAssignedCluster));
+                final PhysicalLocation currentRecordLoc = list.get(recordIndex);
+
+                // If not in the keeper cluster, then keep the minX -> minY valued duplicate (note the tile must be equal for reads to cluster together)
+                if (!(keeperIndex >= 0 && recordAssignedCluster == keeperCluster) && // checking we don't accidentally set the keeper as an optical duplicate
+                        (currentRecordLoc.getX() < representativeLoc.getX() || (currentRecordLoc.getX() == representativeLoc.getX() && currentRecordLoc.getY() < representativeLoc.getY()))) {
+                    // Mark the old min as an optical duplicate, and save the new min
+                    opticalDuplicateFlags[clusterToRepresentativeRead.get(recordAssignedCluster)] = true;
+                    clusterToRepresentativeRead.put(recordAssignedCluster, recordIndex);
+                } else {
+                    // If a smaller read has already been visited, mark the test read as an optical duplicate
+                    opticalDuplicateFlags[recordIndex] = true;
+                }
+            } else {
+                clusterToRepresentativeRead.put(recordAssignedCluster, recordIndex);
+            }
+        }

        return opticalDuplicateFlags;
    }

+    private void fillGraphFromAGroup(final List<? extends PhysicalLocation> wholeList, final List<Integer> groupList, final boolean logProgress, final ProgressLogger progressLoggerForKeeper, final int distance, final GraphUtils.Graph<Integer> opticalDistanceRelationGraph) {
+
+        for (int i = 0; i < groupList.size(); i++) {
+            final int iIndex = groupList.get(i);
+            final PhysicalLocation currentLoc = wholeList.get(iIndex);
+            // The main point of adding this log and if statement (also below) is a workaround a bug in the JVM
+            // which causes a deep exception (https://github.com/broadinstitute/picard/issues/472).
+            // It seems that this is related to https://bugs.openjdk.java.net/browse/JDK-8033717 which
+            // was closed due to non-reproducibility. We came across a bam file that evoked this error
+            // every time we tried to duplicate-mark it. The problem seemed to be a duplicate-set of size 500,000,
+            // and this loop seemed to kill the JVM for some reason. This logging statement (and the one in the
+            // loop below) solved the problem.
+            if (logProgress) {
+                progressLoggerForKeeper.record(String.format("%d", currentLoc.getReadGroup()), currentLoc.getX());
+            }
+
+            for (int j = i + 1; j < groupList.size(); j++) {
+                final int jIndex = groupList.get(j);
+                final PhysicalLocation other = wholeList.get(jIndex);
+
+                if (closeEnoughShort(currentLoc, other, distance)) {
+                    opticalDistanceRelationGraph.addEdge(iIndex, jIndex);
+                }
+            }
+        }
+    }
+
    /** Returns the keeper if it is contained within the list and has location information, otherwise null. */
    private PhysicalLocation keeperOrNull(final List<? extends PhysicalLocation> list, final PhysicalLocation keeper) {
        if (keeper != null && keeper.hasLocation()) {
@@ -213,4 +333,10 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
               Math.abs(lhs.getX() - rhs.getX()) <= distance &&
               Math.abs(lhs.getY() - rhs.getY()) <= distance;
    }
+
+    private boolean closeEnoughShort(final PhysicalLocation lhs, final PhysicalLocation rhs, final int distance) {
+        return lhs != rhs &&
+                Math.abs(lhs.getX() - rhs.getX()) <= distance &&
+                Math.abs(lhs.getY() - rhs.getY()) <= distance;
+    }
 }
--- a/src/main/java/picard/sam/util/PGTagArgumentCollection.java
+++ b/src/main/java/picard/sam/util/PGTagArgumentCollection.java
@@ -8,6 +8,6 @@ import org.broadinstitute.barclay.argparser.Argument;
 public class PGTagArgumentCollection {

    @Argument(doc = "Add PG tag to each read in a SAM or BAM", common = true)
-    public Boolean ADD_PG_TAG_TO_READS = true;
+    public boolean ADD_PG_TAG_TO_READS = true;

 }
\ No newline at end of file
--- a/src/main/java/picard/util/LiftoverUtils.java
+++ b/src/main/java/picard/util/LiftoverUtils.java
@@ -34,10 +34,14 @@ import htsjdk.variant.vcf.VCFConstants;
 import org.apache.commons.lang3.ArrayUtils;
 import picard.vcf.LiftoverVcf;

-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.stream.Collectors;

-
 public class LiftoverUtils {

    /**
@@ -52,13 +56,14 @@ public class LiftoverUtils {
    public static final Collection<String> DEFAULT_TAGS_TO_REVERSE = Arrays.asList("AF");
    public static final Collection<String> DEFAULT_TAGS_TO_DROP = Arrays.asList("MAX_AF");

-
    public static final Log log = Log.getInstance(LiftoverUtils.class);
+
    /**
     * This will take an input VariantContext and lift to the provided interval.
     * If this interval is in the opposite orientation, all alleles and genotypes will be reverse complemented
     * and indels will be left-aligned.  Currently this is only able to invert biallelic indels, and null will be
     * returned for any unsupported VC.
+     *
     * @param source                The VariantContext to lift
     * @param target                The target interval
     * @param refSeq                The reference sequence, which should match the target interval
@@ -112,6 +117,7 @@ public class LiftoverUtils {
     * This is a utility method that will convert a list of alleles into a list of base strings.  Reference status
     * is ignored when creating these strings (i.e. 'A', not 'A*').  These strings should be sufficient
     * to recreate an Allele using Allele.create()
+     *
     * @param alleles The list of alleles
     * @return A list of strings representing the bases of the input alleles.
     */
@@ -140,18 +146,14 @@ public class LiftoverUtils {
            throw new IllegalArgumentException("This should only be called for negative strand liftovers");
        }

-        // not currently supported
-        if (source.isIndel() && !source.isBiallelic()) {
-            return null;
-        }
-
        final List<Allele> origAlleles = new ArrayList<>(source.getAlleles());
        final VariantContextBuilder vcb = new VariantContextBuilder(source);
        vcb.chr(target.getContig());

        // By convention, indels are left aligned and include the base prior to that indel.
        // When reverse complementing, will be necessary to include this additional base.
-        // This check prevents the presumably rare situation in which the indel occurs on the first position of the sequence
+        // This check prevents the extremely rare situation in which the indel occurs on the
+        // first base of the sequence
        final boolean addToStart = source.isIndel() && target.getStart() > 1;

        final int start = target.getStart() - (addToStart ? 1 : 0);
@@ -162,7 +164,7 @@ public class LiftoverUtils {

        vcb.alleles(reverseComplementAlleles(origAlleles, target, refSeq, source.isIndel(), addToStart));

-        if (source.isIndel()) {
+        if (!source.isSNP()) {
            // check that the reverse complemented bases match the new reference
            if (!referenceAlleleMatchesReferenceForIndel(vcb.getAlleles(), refSeq, start, stop)) {
                return null;
@@ -175,22 +177,21 @@ public class LiftoverUtils {
        return vcb;
    }

-    private static List<Allele> reverseComplementAlleles(final List<Allele> originalAlleles, final Interval target, final ReferenceSequence refSeq, final boolean isBiAllelicIndel, final boolean addToStart){
+    private static List<Allele> reverseComplementAlleles(final List<Allele> originalAlleles, final Interval target, final ReferenceSequence refSeq, final boolean isIndel, final boolean addToStart) {
        final List<Allele> alleles = new ArrayList<>();

        for (final Allele oldAllele : originalAlleles) {
-            alleles.add(LiftoverUtils.reverseComplement(oldAllele, target, refSeq, isBiAllelicIndel, addToStart));
+            alleles.add(LiftoverUtils.reverseComplement(oldAllele, target, refSeq, isIndel, addToStart));
        }

        return alleles;
    }

-    private static Allele reverseComplement(final Allele oldAllele, final Interval target, final ReferenceSequence referenceSequence, final boolean isBiAllelicIndel, final boolean addToStart){
+    private static Allele reverseComplement(final Allele oldAllele, final Interval target, final ReferenceSequence referenceSequence, final boolean isIndel, final boolean addToStart) {

        if (oldAllele.isSymbolic() || oldAllele.isNoCall()) {
            return oldAllele;
-        }
-        else if (isBiAllelicIndel) {
+        } else if (isIndel) {
            // target.getStart is 1-based, reference bases are 0-based
            final StringBuilder alleleBuilder = new StringBuilder(target.getEnd() - target.getStart() + 1);

@@ -344,7 +345,6 @@ public class LiftoverUtils {
     *
     * Based on Adrian Tan, Gon&ccedil;alo R. Abecasis and Hyun Min Kang. (2015)
     * Unified Representation of Genetic Variants. Bioinformatics.
-     *
     */
    protected static void leftAlignVariant(final VariantContextBuilder builder, final int start, final int end, final List<Allele> alleles, final ReferenceSequence referenceSequence) {

@@ -422,7 +422,9 @@ public class LiftoverUtils {
                .collect(Collectors.toMap(Map.Entry::getKey, me -> Allele.create(me.getValue(), me.getKey().isReference())));

        //retain original order:
-        List<Allele> fixedAlleles = alleles.stream().map(fixedAlleleMap::get).collect(Collectors.toList());
+        List<Allele> fixedAlleles = alleles.stream()
+                .map(fixedAlleleMap::get)
+                .collect(Collectors.toList());

        builder.alleles(fixedAlleles);
    }

--- a/src/main/java/picard/vcf/LiftoverVcf.java
+++ b/src/main/java/picard/vcf/LiftoverVcf.java
@@ -31,7 +31,9 @@ import htsjdk.samtools.liftover.LiftOver;
 import htsjdk.samtools.reference.ReferenceSequence;
 import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
 import htsjdk.samtools.util.*;
-import htsjdk.variant.variantcontext.*;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
 import htsjdk.variant.variantcontext.writer.Options;
 import htsjdk.variant.variantcontext.writer.VariantContextWriter;
 import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
@@ -49,6 +51,7 @@ import java.io.File;
 import java.text.DecimalFormat;
 import java.text.NumberFormat;
 import java.util.*;
+import java.util.stream.Collectors;

 /**
 * <h3>Summary</h3>
@@ -184,7 +187,7 @@ public class LiftoverVcf extends CommandLineProgram {
    /**
     * Filter name to use when a target cannot be lifted over.
     */
-    public static final String FILTER_CANNOT_LIFTOVER_INDEL = "ReverseComplementedIndel";
+    public static final String FILTER_CANNOT_LIFTOVER_REV_COMP = "CannotLiftOver";

    /**
     * Filter name to use when a target cannot be lifted over.
@@ -206,7 +209,7 @@ public class LiftoverVcf extends CommandLineProgram {
     * Filters to be added to the REJECT file.
     */
    private static final List<VCFFilterHeaderLine> FILTERS = CollectionUtil.makeList(
-            new VCFFilterHeaderLine(FILTER_CANNOT_LIFTOVER_INDEL, "Indel falls into a reverse complemented region in the target genome."),
+            new VCFFilterHeaderLine(FILTER_CANNOT_LIFTOVER_REV_COMP, "Liftover of a variant that needed reverse-complementing failed for unknown reasons."),
            new VCFFilterHeaderLine(FILTER_NO_TARGET, "Variant could not be lifted between genome builds."),
            new VCFFilterHeaderLine(FILTER_MISMATCHING_REF_ALLELE, "Reference allele does not match reference genome sequence after liftover."),
            new VCFFilterHeaderLine(FILTER_INDEL_STRADDLES_TWO_INTERVALS, "Reference allele in Indel is straddling multiple intervals in the chain, and so the results are not well defined.")
@@ -232,6 +235,11 @@ public class LiftoverVcf extends CommandLineProgram {
     */
    public static final String ATTEMPTED_LOCUS = "AttemptedLocus";

+    /**
+     * Attribute used to store the position of the failed variant on the target contig prior to finding out that alleles do not match.
+     */
+    public static final String ATTEMPTED_ALLELES = "AttemptedAlleles";
+
    /**
     * Metadata to be added to the Passing file.
     */
@@ -324,9 +332,12 @@ public class LiftoverVcf extends CommandLineProgram {
                .modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, ALLOW_MISSING_FIELDS_IN_HEADER)
                .build();
        final VCFHeader rejectHeader = new VCFHeader(in.getFileHeader());
-        for (final VCFFilterHeaderLine line : FILTERS) rejectHeader.addMetaDataLine(line);
+        for (final VCFFilterHeaderLine line : FILTERS) {
+            rejectHeader.addMetaDataLine(line);
+        }

-        rejectHeader.addMetaDataLine(new VCFInfoHeaderLine(ATTEMPTED_LOCUS,1, VCFHeaderLineType.String, "The locus of the variant in the TARGET prior to failing due to mismatching alleles."));
+        rejectHeader.addMetaDataLine(new VCFInfoHeaderLine(ATTEMPTED_LOCUS, 1, VCFHeaderLineType.String, "The locus of the variant in the TARGET prior to failing due to reference allele mismatching to the target reference."));
+        rejectHeader.addMetaDataLine(new VCFInfoHeaderLine(ATTEMPTED_ALLELES, 1, VCFHeaderLineType.String, "The alleles of the variant in the TARGET prior to failing due to reference allele mismatching to the target reference."));

        rejects.writeHeader(rejectHeader);

@@ -369,11 +380,7 @@ public class LiftoverVcf extends CommandLineProgram {

            final ReferenceSequence refSeq;

-            // if the target is null OR (the target is reverse complemented AND the variant is a non-biallelic indel or mixed), then we cannot lift it over
-            if (target.isNegativeStrand() && (ctx.isMixed() || ctx.isIndel() && !ctx.isBiallelic())) {
-                rejectVariant(ctx, FILTER_CANNOT_LIFTOVER_INDEL);
-
-            } else if (!refSeqs.containsKey(target.getContig())) {
+            if (!refSeqs.containsKey(target.getContig())) {
                rejectVariant(ctx, FILTER_NO_TARGET);

                final String missingContigMessage = "Encountered a contig, " + target.getContig() + " that is not part of the target reference.";
@@ -389,7 +396,7 @@ public class LiftoverVcf extends CommandLineProgram {
                final VariantContext liftedVC = LiftoverUtils.liftVariant(ctx, target, refSeq, WRITE_ORIGINAL_POSITION, WRITE_ORIGINAL_ALLELES);
                // the liftedVC can be null if the liftover fails because of a problem with reverse complementing
                if (liftedVC == null) {
-                    rejectVariant(ctx, FILTER_CANNOT_LIFTOVER_INDEL);
+                    rejectVariant(ctx, FILTER_CANNOT_LIFTOVER_REV_COMP);
                } else {
                    tryToAddVariant(liftedVC, refSeq, ctx);
                }
@@ -479,7 +486,6 @@ public class LiftoverVcf extends CommandLineProgram {
     * @param vc new {@link VariantContext}
     * @param refSeq {@link ReferenceSequence} of new reference
     * @param source the original {@link VariantContext} to use for putting the original location information into vc
-     * @return true if successful, false if failed due to mismatching reference allele.
     */
    private void tryToAddVariant(final VariantContext vc, final ReferenceSequence refSeq, final VariantContext source) {
        if (!refSeq.getName().equals(vc.getContig())) {
@@ -514,6 +520,7 @@ public class LiftoverVcf extends CommandLineProgram {
            rejects.add(new VariantContextBuilder(source)
                    .filter(FILTER_MISMATCHING_REF_ALLELE)
                    .attribute(ATTEMPTED_LOCUS, String.format("%s:%d-%d", vc.getContig(), vc.getStart(), vc.getEnd()))
+                    .attribute(ATTEMPTED_ALLELES, vc.getReference().toString() + "->" + String.join(",", vc.getAlternateAlleles().stream().map(Allele::toString).collect(Collectors.toList())))
                    .make());
            failedAlleleCheck++;
            trackLiftedVariantContig(rejectsByContig, source.getContig());

--- a/src/test/java/picard/sam/AddOATagTest.java
+++ b/src/test/java/picard/sam/AddOATagTest.java
+package picard.sam;
+
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2018 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SamReaderFactory;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class AddOATagTest {
+    private static final Path TEST_DIR = Paths.get("testdata/picard/sam/");
+    private static final Path OA_TEST_DIR = TEST_DIR.resolve("AddOATag/");
+
+    @DataProvider(name="testOAData")
+    Object[][] testOAData() {
+        return new Object[][]{
+                new Object[]{OA_TEST_DIR.resolve("aligned_withoutOA.sam").toFile(), OA_TEST_DIR.resolve("aligned_withOA.sam").toFile()},
+        };
+    }
+
+    @DataProvider(name="testIntervalListData")
+    Object[][] testIntervalListData() {
+        return new Object[][]{
+                new Object[]{TEST_DIR.resolve("aligned.sam").toFile(), null, OA_TEST_DIR.resolve("aligned_no_intervals.sam").toFile()},
+                new Object[]{TEST_DIR.resolve("aligned.sam").toFile(), new File("testdata/picard/analysis/directed/CollectHsMetrics/chrM.interval_list"), TEST_DIR.resolve("aligned.sam").toFile()},
+                new Object[]{TEST_DIR.resolve("aligned.sam").toFile(), TEST_DIR.resolve("contiguous.interval_list").toFile(), OA_TEST_DIR.resolve("aligned_with_intervals.sam").toFile()}
+        };
+    }
+
+    @Test(dataProvider = "testOAData")
+    public void testWritingOATag(final File testSam, final File truthSam) throws IOException {
+        final File clpOutput = File.createTempFile("AddOATag", ".bam");
+        clpOutput.deleteOnExit();
+
+        runAddOATag(testSam, clpOutput, null);
+
+        validateOATag(clpOutput, truthSam);
+    }
+
+    @Test(dataProvider = "testIntervalListData")
+    public void testIntervalList(final File inputSam, final File intervalList, final File truthSam) throws IOException {
+        final File clpOutput = File.createTempFile("AddOATag", ".bam");
+        clpOutput.deleteOnExit();
+
+        runAddOATag(inputSam, clpOutput, intervalList);
+
+        validateOATag(clpOutput, truthSam);
+    }
+
+    private static void runAddOATag(final File inputSam, final File output, final File intervalList) throws IOException {
+        final List<String> args = new ArrayList<>(Arrays.asList(
+          "INPUT=" + inputSam,
+          "OUTPUT=" + output
+        ));
+        if (intervalList != null) {
+            args.add("INTERVAL_LIST=" + intervalList);
+        }
+        AddOATag addOATag = new AddOATag();
+        Assert.assertEquals(addOATag.instanceMain(args.toArray(new String[0])), 0, "Running addOATag did not succeed");
+    }
+
+    private static void validateOATag(final File testSam, final File truthSam) {
+        final ArrayList<String> truthOAValues = new ArrayList<>();
+        final ArrayList<String> testOAValues = new ArrayList<>();
+
+        SAMRecordIterator iterator = SamReaderFactory.makeDefault().open(truthSam).iterator();
+        while (iterator.hasNext()) {
+            SAMRecord rec = iterator.next();
+            truthOAValues.add(rec.getStringAttribute(AddOATag.OA));
+        }
+
+        iterator = SamReaderFactory.makeDefault().open(testSam).iterator();
+        while (iterator.hasNext()) {
+            SAMRecord rec = iterator.next();
+            testOAValues.add(rec.getStringAttribute(AddOATag.OA));
+        }
+
+        Assert.assertEquals(testOAValues, truthOAValues);
+    }
+}
--- a/src/test/java/picard/sam/CompareSAMsTest.java
+++ b/src/test/java/picard/sam/CompareSAMsTest.java
@@ -98,6 +98,11 @@ public class CompareSAMsTest extends CommandLineProgramTest {
        testHelper("genomic_sorted.sam", "genomic_sorted.sam", 2, 0, 0, 0, 0, 0, 0, true);
    }

+    @Test
+    public void testIdenticalExceptHeaderVersion() {
+        testHelper("genomic_sorted.sam", "genomic_sorted_sam_v1.6.sam", 2, 0, 0, 0, 0, 0, 0, false);
+    }
+
    @Test
    public void testHasNonPrimary() {
        testHelper("genomic_sorted.sam", "has_non_primary.sam", 2, 0, 0, 0, 0, 0, 0, true);

--- a/src/test/java/picard/sam/ValidateSamFileTest.java
+++ b/src/test/java/picard/sam/ValidateSamFileTest.java
@@ -21,8 +21,10 @@ public class ValidateSamFileTest extends CommandLineProgramTest {
        return new Object[][] {
                {"nofile", ValidateSamFile.ReturnTypes.FAILED.value()},
                {"good/sorted-pair.sam", ValidateSamFile.ReturnTypes.SUCCESSFUL.value()},
+                {"good/sorted-pair-v1.6.sam", ValidateSamFile.ReturnTypes.SUCCESSFUL.value()},
                {"bad/unpaired-mate.sam", ValidateSamFile.ReturnTypes.ERRORS.value()},
                {"bad/missing-rg-info.sam", ValidateSamFile.ReturnTypes.ERRORS_WARNINGS.value()},
+                {"bad/missing-rg-info-v1.6.sam", ValidateSamFile.ReturnTypes.ERRORS_WARNINGS.value()},
                {"bad/sorted-pair-missing-rg.sam", ValidateSamFile.ReturnTypes.WARNINGS.value()}
        };
    }

--- a/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTester.java
+++ b/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTester.java
@@ -32,7 +32,6 @@ import htsjdk.samtools.SamReader;
 import htsjdk.samtools.SamReaderFactory;
 import htsjdk.samtools.metrics.MetricsFile;
 import htsjdk.samtools.util.CloseableIterator;
-import htsjdk.samtools.util.CloserUtil;
 import htsjdk.samtools.util.FormatUtil;
 import htsjdk.samtools.util.TestUtil;
 import org.testng.Assert;
@@ -43,6 +42,7 @@ import picard.sam.testers.SamFileTester;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.FileReader;
+import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;

@@ -143,44 +143,47 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam
    }

    @Override
-    public void test() {
+    public void test() throws IOException {
+        testMetrics();
+    }
+
+    /**
+     * Runs test and returns metrics
+     * @return Duplication metrics
+     * @throws IOException
+     */
+    public MetricsFile<DuplicationMetrics, Double> testMetrics() throws IOException {
        try {
            updateExpectedDuplicationMetrics();

            // Read the output and check the duplicate flag
            int outputRecords = 0;
            final Set<String> sequencingDTErrorsSeen = new HashSet<>();
-            final SamReader reader = SamReaderFactory.makeDefault().open(getOutput());
+            try(final SamReader reader = SamReaderFactory.makeDefault().open(getOutput())) {
                for (final SAMRecord record : reader) {
                    outputRecords++;
                    final String key = samRecordToDuplicatesFlagsKey(record);
-                if (!this.duplicateFlags.containsKey(key)) {
-                    System.err.println("DOES NOT CONTAIN KEY: " + key);
-                }
+
                    Assert.assertTrue(this.duplicateFlags.containsKey(key));
                    final boolean value = this.duplicateFlags.get(key);
                    this.duplicateFlags.remove(key);
-                if (value != record.getDuplicateReadFlag()) {
-                    System.err.println("Mismatching read:");
-                    System.err.print(record.getSAMString());
-                }
-                Assert.assertEquals(record.getDuplicateReadFlag(), value);
+                    Assert.assertEquals(record.getDuplicateReadFlag(), value, "Mismatching read: " + record.getSAMString());
                    if (testOpticalDuplicateDTTag && MarkDuplicates.DUPLICATE_TYPE_SEQUENCING.equals(record.getAttribute("DT"))) {
                        sequencingDTErrorsSeen.add(record.getReadName());
                    }
                }
-            CloserUtil.close(reader);
+            }

            // Ensure the program output the same number of records as were read in
            Assert.assertEquals(outputRecords, this.getNumberOfRecords(), ("saw " + outputRecords + " output records, vs. " + this.getNumberOfRecords() + " input records"));

            // Check the values written to metrics.txt against our input expectations
-            final MetricsFile<DuplicationMetrics, Comparable<?>> metricsOutput = new MetricsFile<DuplicationMetrics, Comparable<?>>();
+            final MetricsFile<DuplicationMetrics, Double> metricsOutput = new MetricsFile<>();
            try{
                metricsOutput.read(new FileReader(metricsFile));
            }
            catch (final FileNotFoundException ex) {
-                System.err.println("Metrics file not found: " + ex);
+                Assert.fail("Metrics file not found: " + ex.getMessage());
            }
            Assert.assertEquals(metricsOutput.getMetrics().size(), 1);
            final DuplicationMetrics observedMetrics = metricsOutput.getMetrics().get(0);
@@ -197,6 +200,7 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam
                Assert.assertEquals(sequencingDTErrorsSeen.size(), expectedMetrics.READ_PAIR_OPTICAL_DUPLICATES, "READ_PAIR_OPTICAL_DUPLICATES does not match duplicate groups observed in the file");
                Assert.assertEquals(sequencingDTErrorsSeen.size(), observedMetrics.READ_PAIR_OPTICAL_DUPLICATES, "READ_PAIR_OPTICAL_DUPLICATES does not match duplicate groups observed in the file");
            }
+            return metricsOutput;
        } finally {
            TestUtil.recursiveDelete(getOutputDir());
        }

--- a/src/test/java/picard/sam/markduplicates/MarkDuplicatesSetSizeHistogramTest.java
+++ b/src/test/java/picard/sam/markduplicates/MarkDuplicatesSetSizeHistogramTest.java
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2018 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.sam.markduplicates;
+
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+
+/**
+ * This class defines the individual test cases to check the duplicate set histograms.
+ *
+ * @author hogstrom@broadinstitute.org
+ */
+public class MarkDuplicatesSetSizeHistogramTest extends AbstractMarkDuplicatesCommandLineProgramTest {
+    @Override
+    protected MarkDuplicatesSetSizeHistogramTester getTester() {
+        return new MarkDuplicatesSetSizeHistogramTester();
+    }
+
+    @Test
+    public void TestSingleSet() {
+        // This tests checks that if I add two read pairs with the same alignment start, a duplicate
+        // set size entry of 2 is put into the histogram. The pair is an optical dup, so a 2 entry for
+        // the 'optical_sets' should also be marked
+        final MarkDuplicatesSetSizeHistogramTester tester = getTester();
+        tester.setExpectedOpticalDuplicate(1);
+        String representativeReadName = "RUNID:1:1:16020:13352";
+        tester.addMatePair(representativeReadName, 1, 485253, 485253, false, false, false, false, "45M", "45M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        tester.addMatePair("RUNID:1:1:15993:13361", 1, 485253, 485253, false, false, true, true, "44M1S", "44M1S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        // set expected counts in hashmap that takes the form: key=(Histogram Label, histogram bin), value=histogram entry
+        tester.expectedSetSizeMap.put(Arrays.asList("all_sets", "2.0"), 1.0);
+        tester.expectedSetSizeMap.put(Arrays.asList("optical_sets", "2.0"), 1.0);
+        tester.runTest();
+    }
+
+    @Test
+    public void testOpticalAndNonOpticalSet() {
+        // This tests checks that if I have two optical dups and one non-optical in the same dup set that the correct histogram entries
+        // are specified
+        final MarkDuplicatesSetSizeHistogramTester tester = getTester();
+        tester.setExpectedOpticalDuplicate(2);
+        String representativeReadName = "RUNID:1:1:16020:13352";
+        tester.addMatePair(representativeReadName, 1, 485253, 485253, false, false, false, false, "45M", "45M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        tester.addMatePair("RUNID:1:1:15993:13361", 1, 485253, 485253, false, false, true, true, "44M1S", "44M1S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        tester.addMatePair("RUNID:1:1:15994:13364", 1, 485253, 485253, false, false, true, true, "44M1S", "44M1S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        // add one non-optical duplicate
+        tester.addMatePair("RUNID:1:1:25993:23361", 1, 485253, 485253, false, false, true, true, "44M1S", "44M1S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        tester.expectedSetSizeMap.put(Arrays.asList("all_sets", "4.0"), 1.0);
+        tester.expectedSetSizeMap.put(Arrays.asList("optical_sets", "3.0"), 1.0);
+        tester.runTest();
+    }
+
+    @Test
+    public void testSingleton() {
+        // This tests checks if having a read pair that is not duplicated, the correct histogram entry is updated
+        final MarkDuplicatesSetSizeHistogramTester tester = getTester();
+        tester.setExpectedOpticalDuplicate(0);
+        // Add non-duplicate read
+        tester.addMatePair("RUNID:1:1:15993:13375", 1, 485255, 485255, false, false, false, false, "43M2S", "43M2S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        tester.expectedSetSizeMap.put(Arrays.asList("all_sets", "1.0"), 1.0);
+        tester.runTest();
+    }
+
+    @Test
+    public void testMultiRepresentativeReadTags() {
+        // This tests checks multiple different duplicate sets of varying sizes
+        final MarkDuplicatesSetSizeHistogramTester tester = getTester();
+        tester.setExpectedOpticalDuplicate(3);
+        // Duplicate set: size 2 - all optical
+        String representativeReadName1 = "RUNID:1:1:16020:13352";
+        tester.addMatePair(representativeReadName1, 1, 485253, 485253, false, false, false, false, "45M", "45M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        tester.addMatePair("RUNID:1:1:15993:13361", 1, 485253, 485253, false, false, true, true, "44M1S", "44M1S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        tester.expectedSetSizeMap.put(Arrays.asList("all_sets", "3.0"), 1.0);
+        tester.expectedSetSizeMap.put(Arrays.asList("optical_sets", "3.0"), 1.0);
+
+        // Duplicate set: size 3 - all optical
+        String representativeReadName2 = "RUNID:1:1:15993:13360";
+        tester.addMatePair(representativeReadName2, 1, 485299, 485299, false, false, false, false, "45M", "45M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        tester.addMatePair("RUNID:1:1:15993:13365", 1, 485299, 485299, false, false, true, true, "44M1S", "44M1S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        tester.addMatePair("RUNID:1:1:15993:13370", 1, 485299, 485299, false, false, true, true, "43M2S", "43M2S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        tester.expectedSetSizeMap.put(Arrays.asList("all_sets", "4.0"), 1.0);
+        tester.expectedSetSizeMap.put(Arrays.asList("optical_sets", "4.0"), 1.0);
+
+        // Add non-duplicate read
+        tester.addMatePair("RUNID:1:1:15993:13375", 1, 485255, 485255, false, false, false, false, "43M2S", "43M2S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+        tester.expectedSetSizeMap.put(Arrays.asList("all_sets", "1.0"), 1.0);
+
+        tester.runTest();
+    }
+
+}
--- a/src/test/java/picard/sam/markduplicates/MarkDuplicatesSetSizeHistogramTester.java
+++ b/src/test/java/picard/sam/markduplicates/MarkDuplicatesSetSizeHistogramTester.java
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2018 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.sam.markduplicates;
+
+import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.util.Histogram;
+import htsjdk.samtools.util.TestUtil;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import picard.cmdline.CommandLineProgram;
+import picard.sam.DuplicationMetrics;
+
+import java.io.IOException;
+import java.util.*;
+
+/**
+ * This class is an extension of AbstractMarkDuplicatesCommandLineProgramTester used to test MarkDuplicatesWithMateCigar with SAM files generated on the fly.
+ * This performs the underlying tests defined by classes such as see AbstractMarkDuplicatesCommandLineProgramTest and MarkDuplicatesWithMateCigarTest.
+ */
+public class MarkDuplicatesSetSizeHistogramTester extends AbstractMarkDuplicatesCommandLineProgramTester {
+
+    final public Map<List<String>, Double> expectedSetSizeMap = new HashMap<>(); // key=(Histogram Label, histogram bin), value=histogram entry
+
+    @Override
+    protected CommandLineProgram getProgram() {
+        return new MarkDuplicates();
+    }
+
+    @Override
+    public void test() throws IOException {
+        final MetricsFile<DuplicationMetrics, Double> metricsOutput = testMetrics();
+
+        // Check contents of set size bin against expected values
+        if (!expectedSetSizeMap.isEmpty()) {
+            boolean checked = false;
+            for (final Histogram<Double> histo : metricsOutput.getAllHistograms()) {
+                final String label = histo.getValueLabel();
+                for (final Double bin : histo.keySet()) {
+                    final String binStr = String.valueOf(bin);
+                    final List<String> labelBinStr = Arrays.asList(label, binStr);
+                    if (expectedSetSizeMap.containsKey(labelBinStr)) {
+                        checked = true;
+                        Histogram.Bin<Double> binValue = histo.get(bin);
+                        final double actual = binValue.getValue();
+                        final double expected = expectedSetSizeMap.get(labelBinStr);
+                        Assert.assertEquals(actual, expected);
+                    }
+                }
+            }
+            if (!checked) {
+                Assert.fail("Could not not find matching entry for expectedSetSizeMap in metrics.");
+            }
+        }
+    }
+
+    @AfterClass
+    public void afterTest() {
+        TestUtil.recursiveDelete(getOutputDir());
+    }
+
+}