Skip to content
Commits on Source (6)
FROM broadinstitute/java-baseimage
FROM openjdk:8
MAINTAINER Broad Institute DSDE <dsde-engineering@broadinstitute.org>
ARG build_command=shadowJar
......
......@@ -64,7 +64,7 @@ def ensureBuildPrerequisites(requiredJavaVersion, buildPrerequisitesMessage) {
}
ensureBuildPrerequisites(requiredJavaVersion, buildPrerequisitesMessage)
final htsjdkVersion = System.getProperty('htsjdk.version', '2.16.1')
final htsjdkVersion = System.getProperty('htsjdk.version', '2.18.2')
// We use a custom shaded build of the NIO library to avoid a regression in the authentication layer.
// GATK does the same, see https://github.com/broadinstitute/gatk/issues/3591
......
picard-tools (2.18.25+dfsg-1) unstable; urgency=medium
* New upstream release
-- Olivier Sallou <osallou@debian.org> Fri, 25 Jan 2019 10:46:36 +0000
picard-tools (2.18.23+dfsg-2) unstable; urgency=medium
* Fix Class-Path in manifest file for dependencies (Closes: #919559).
......
......@@ -19,7 +19,7 @@ Build-Depends: default-jdk (>= 2:1.9~),
libgkl-java,
libgatk-native-bindings-java,
# htsjdk and picard-tools are relased nearly together
libhtsjdk-java (>= 2.16.1~),
libhtsjdk-java (>= 2.18.2~),
# required for tests:
testng (>= 6.9.10),
r-base-core,
......
......@@ -2,7 +2,7 @@ Description: do not use shadowjar
Author: Sascha Steinbiss <satta@debian.org>
--- a/build.gradle
+++ b/build.gradle
@@ -126,7 +126,7 @@ group = 'com.github.broadinstitute'
@@ -126,7 +126,7 @@
defaultTasks 'all'
......@@ -11,7 +11,7 @@ Author: Sascha Steinbiss <satta@debian.org>
// Source file names for the picard command line properties file. We select and include only one of
// these two files in each jar, renamed to "picardCmdLine.properties", depending on which parser we
@@ -214,6 +214,7 @@ task picardDoc(type: Javadoc, dependsOn:
@@ -213,6 +213,7 @@
options.addStringOption("verbose")
}
......@@ -19,7 +19,7 @@ Author: Sascha Steinbiss <satta@debian.org>
task currentJar(type: Copy){
from shadowJar
into new File(buildDir, "libs")
@@ -231,8 +232,7 @@ shadowJar {
@@ -230,8 +231,7 @@
}
}
}
......
......@@ -43,7 +43,7 @@ Forwarded: no
* 4 cases tested:
--- a/src/test/java/picard/util/LiftoverVcfTest.java
+++ b/src/test/java/picard/util/LiftoverVcfTest.java
@@ -1196,30 +1196,4 @@
@@ -1187,30 +1187,4 @@
Assert.assertEquals(runPicardCommandLine(args), 1);
}
......
/*
* The MIT License
*
* Copyright (c) 2018 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.sam;
import htsjdk.samtools.*;
import htsjdk.samtools.util.*;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
@CommandLineProgramProperties(
summary = AddOATag.USAGE_DETAILS,
oneLineSummary = AddOATag.USAGE_SUMMARY,
programGroup = ReadDataManipulationProgramGroup.class)
@DocumentedFeature
public class AddOATag extends CommandLineProgram {
static final String USAGE_SUMMARY = "Record current alignment information to OA tag.";
static final String USAGE_DETAILS = "This tool takes in an aligned SAM or BAM and adds the " +
"OA tag to every aligned read unless an interval list is specified, where it only adds the tag to reads " +
"that fall within the intervals in the interval list. This can be useful if you are about to realign but want " +
"to keep the original alignment information as a separate tag." +
"<br />"+
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar AddOATag \\<br />" +
" L=some_picard.interval_list \\<br />" +
" I=sorted.bam \\<br />" +
" O=fixed.bam <br />"+
"</pre>";
@Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "SAM or BAM input file")
public String INPUT;
@Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "SAM or BAM file to write merged result to")
public String OUTPUT;
@Argument(shortName = "L", doc = "If provided, only records that overlap given interval list will have the OA tag added.", optional = true)
public File INTERVAL_LIST;
/**
* Original Alignment tag key.
*/
public static final String OA = "OA";
private static final Log log = Log.getInstance(AddOATag.class);
@Override
protected int doWork() {
try (final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(IOUtil.getPath(INPUT));
final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), true, IOUtil.getPath(OUTPUT), REFERENCE_SEQUENCE)) {
writer.setProgressLogger(
new ProgressLogger(log, (int) 1e7, "Wrote", "records"));
final OverlapDetector overlapDetector = getOverlapDetectorFromIntervalListFile(INTERVAL_LIST, 0, 0);
for (final SAMRecord rec : reader) {
if (overlapDetector == null || overlapDetector.overlapsAny(rec)) {
setOATag(rec);
}
writer.addAlignment(rec);
}
} catch (IOException e) {
log.error(e);
return 1;
}
return 0;
}
// Take an interval list file and convert it to an overlap detector, can add left and right padding
static OverlapDetector<Interval> getOverlapDetectorFromIntervalListFile(File intervalList, int lhsBuffer, int rhsBuffer) {
if (intervalList == null) {
return null;
}
List<Interval> intervals = IntervalList.fromFile(intervalList).uniqued().getIntervals();
OverlapDetector<Interval> detector = new OverlapDetector<>(lhsBuffer, rhsBuffer);
detector.addAll(intervals, intervals);
return detector;
}
// format OA tag string according to the spec
//TODO: Move this to htsjdk once https://github.com/samtools/hts-specs/pull/193 is merged
private void setOATag(SAMRecord rec) {
if (rec.getReferenceName().contains(",")) {
throw new PicardException(String.format("Reference name for record %s contains a comma character.", rec.getReadName()));
}
final String oaValue;
if (rec.getReadUnmappedFlag()) {
oaValue = String.format("*,0,%s,*,255,;", rec.getReadNegativeStrandFlag() ? "-" : "+");
} else {
oaValue = String.format("%s,%s,%s,%s,%s,%s;",
rec.getReferenceName(),
rec.getAlignmentStart(),
rec.getReadNegativeStrandFlag() ? "-" : "+",
rec.getCigarString(),
rec.getMappingQuality(),
Optional.ofNullable(rec.getAttribute(SAMTag.NM.name())).orElse(""));
}
rec.setAttribute(OA, Optional.ofNullable(rec.getAttribute(OA)).orElse("") + oaValue);
}
}
......@@ -756,23 +756,14 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
if (firstOfNextChunk != null && areComparableForDuplicates(firstOfNextChunk, next, true, useBarcodes)) {
nextChunk.add(next);
} else {
if (nextChunk.size() > 1) {
markDuplicatePairs(nextChunk);
if (TAG_DUPLICATE_SET_MEMBERS) {
addRepresentativeReadIndex(nextChunk);
}
}
handleChunk(nextChunk);
nextChunk.clear();
nextChunk.add(next);
firstOfNextChunk = next;
}
}
if (nextChunk.size() > 1) {
markDuplicatePairs(nextChunk);
if (TAG_DUPLICATE_SET_MEMBERS) {
addRepresentativeReadIndex(nextChunk);
}
}
handleChunk(nextChunk);
this.pairSort.cleanup();
this.pairSort = null;
......@@ -813,6 +804,17 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
}
}
private void handleChunk(List<ReadEndsForMarkDuplicates> nextChunk) {
if (nextChunk.size() > 1) {
markDuplicatePairs(nextChunk);
if (TAG_DUPLICATE_SET_MEMBERS) {
addRepresentativeReadIndex(nextChunk);
}
} else if (nextChunk.size() == 1) {
addSingletonToCount(libraryIdGenerator);
}
}
private boolean areComparableForDuplicates(final ReadEndsForMarkDuplicates lhs, final ReadEndsForMarkDuplicates rhs, final boolean compareRead2, final boolean useBarcodes) {
boolean areComparable = lhs.libraryId == rhs.libraryId;
......
......@@ -188,6 +188,11 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
file.setHistogram(metricsByLibrary.values().iterator().next().calculateRoiHistogram());
}
// Add set size histograms - the set size counts are printed on adjacent columns to the ROI metric.
file.addHistogram(libraryIdGenerator.getDuplicateCountHist());
file.addHistogram(libraryIdGenerator.getOpticalDuplicateCountHist());
file.addHistogram(libraryIdGenerator.getNonOpticalDuplicateCountHist());
file.write(METRICS_FILE);
}
......@@ -260,6 +265,7 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
}
// Check if we need to partition since the orientations could have changed
final int nOpticalDup;
if (hasFR && hasRF) { // need to track them independently
// Variables used for optical duplicate detection and tracking
final List<ReadEnds> trackOpticalDuplicatesF = new ArrayList<>();
......@@ -277,11 +283,26 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
}
// track the duplicates
trackOpticalDuplicates(trackOpticalDuplicatesF, keeper, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
trackOpticalDuplicates(trackOpticalDuplicatesR, keeper, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
final int nOpticalDupF = trackOpticalDuplicates(trackOpticalDuplicatesF,
keeper,
opticalDuplicateFinder,
libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
final int nOpticalDupR = trackOpticalDuplicates(trackOpticalDuplicatesR,
keeper,
opticalDuplicateFinder,
libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
nOpticalDup = nOpticalDupF + nOpticalDupR;
} else { // No need to partition
AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(ends, keeper, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
nOpticalDup = trackOpticalDuplicates(ends, keeper, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
}
trackDuplicateCounts(ends.size(),
nOpticalDup,
libraryIdGenerator);
}
public static void addSingletonToCount(final LibraryIdGenerator libraryIdGenerator) {
libraryIdGenerator.getDuplicateCountHist().increment(1.0);
libraryIdGenerator.getNonOpticalDuplicateCountHist().increment(1.0);
}
/**
......@@ -294,7 +315,7 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
* optical duplicate detection, we do not consider them duplicates if one read as FR and the other RF when we order orientation by the
* first mate sequenced (read #1 of the pair).
*/
private static void trackOpticalDuplicates(final List<? extends ReadEnds> list,
private static int trackOpticalDuplicates(final List<? extends ReadEnds> list,
final ReadEnds keeper,
final OpticalDuplicateFinder opticalDuplicateFinder,
final Histogram<Short> opticalDuplicatesByLibraryId) {
......@@ -311,5 +332,23 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
if (opticalDuplicates > 0) {
opticalDuplicatesByLibraryId.increment(list.get(0).getLibraryId(), opticalDuplicates);
}
return opticalDuplicates;
}
private static void trackDuplicateCounts(final int listSize,
final int optDupCnt,
final LibraryIdGenerator libraryIdGenerator) {
final Histogram<Double> duplicatesCountHist = libraryIdGenerator.getDuplicateCountHist();
final Histogram<Double> nonOpticalDuplicatesCountHist = libraryIdGenerator.getNonOpticalDuplicateCountHist();
final Histogram<Double> opticalDuplicatesCountHist = libraryIdGenerator.getOpticalDuplicateCountHist();
duplicatesCountHist.increment((double) listSize);
if ((listSize - optDupCnt) > 0) {
nonOpticalDuplicatesCountHist.increment((double) (listSize - optDupCnt));
}
if (optDupCnt > 0) {
opticalDuplicatesCountHist.increment((double) (optDupCnt + 1.0));
}
}
}
......@@ -47,9 +47,11 @@ public class LibraryIdGenerator {
private final SAMFileHeader header;
private final Map<String, Short> libraryIds = new HashMap<String, Short>(); // from library string to library id
private short nextLibraryId = 1;
private final Map<String, DuplicationMetrics> metricsByLibrary = new HashMap<String, DuplicationMetrics>();
private final Histogram<Short> opticalDuplicatesByLibraryId = new Histogram<Short>();
private final Map<String, DuplicationMetrics> metricsByLibrary = new HashMap<>();
private final Histogram<Short> opticalDuplicatesByLibraryId = new Histogram<>();
private final Histogram<Double> duplicateCountHist = new Histogram<>("set_size", "all_sets");
private final Histogram<Double> nonOpticalDuplicateCountHist = new Histogram<>("set_size", "non_optical_sets");
private final Histogram<Double> opticalDuplicateCountHist = new Histogram<>("set_size", "optical_sets");
public LibraryIdGenerator(final SAMFileHeader header) {
this.header = header;
......@@ -65,13 +67,31 @@ public class LibraryIdGenerator {
}
}
public Map<String, Short> getLibraryIdsMap() { return this.libraryIds; }
public Map<String, Short> getLibraryIdsMap() {
return this.libraryIds;
}
public Map<String, DuplicationMetrics> getMetricsByLibraryMap() {
return this.metricsByLibrary;
}
public Histogram<Short> getOpticalDuplicatesByLibraryIdMap() {
return this.opticalDuplicatesByLibraryId;
}
public Histogram<Double> getDuplicateCountHist() {
return this.duplicateCountHist;
}
public Map<String, DuplicationMetrics> getMetricsByLibraryMap() { return this.metricsByLibrary; }
public Histogram<Double> getNonOpticalDuplicateCountHist() {
return this.nonOpticalDuplicateCountHist;
}
public Histogram<Short> getOpticalDuplicatesByLibraryIdMap() { return this.opticalDuplicatesByLibraryId; }
public Histogram<Double> getOpticalDuplicateCountHist() {
return this.opticalDuplicateCountHist;
}
public static String getReadGroupLibraryName(SAMReadGroupRecord readGroup) {
public static String getReadGroupLibraryName(final SAMReadGroupRecord readGroup) {
return Optional.ofNullable(readGroup.getLibrary())
.orElse(UNKNOWN_LIBRARY);
}
......
......@@ -28,9 +28,10 @@ import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import picard.sam.util.PhysicalLocation;
import picard.sam.util.ReadNameParser;
import picard.util.GraphUtils;
import java.io.Serializable;
import java.util.List;
import java.util.*;
/**
* Contains methods for finding optical/co-localized/sequencing duplicates.
......@@ -83,7 +84,6 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
}
/**
*
* @param readNameRegex see {@link ReadNameParser#DEFAULT_READ_NAME_REGEX}.
* @param opticalDuplicatePixelDistance the optical duplicate pixel distance
* @param log the log to which to write messages.
......@@ -94,7 +94,6 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
}
/**
*
* @param readNameRegex see {@link ReadNameParser#DEFAULT_READ_NAME_REGEX}.
* @param opticalDuplicatePixelDistance the optical duplicate pixel distance
* @param maxDuplicateSetSize the size of a set that is too big enough to process
......@@ -130,7 +129,6 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
return opticalDuplicateFlags;
}
final int distance = this.opticalDuplicatePixelDistance;
final PhysicalLocation actualKeeper = keeperOrNull(list, keeper);
final Log log;
......@@ -149,13 +147,26 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
progressLoggerForKeeper = null;
progressLoggerForRest = null;
}
if (length >= (keeper == null ? 3 : 4)) {
return getOpticalDuplicatesFlagWithGraph(list, actualKeeper, opticalDuplicateFlags, log, progressLoggerForKeeper, progressLoggerForRest, logProgress);
} else {
return getOpticalDuplicatesFlagFast(list, actualKeeper, opticalDuplicateFlags, log, progressLoggerForKeeper, progressLoggerForRest, logProgress);
}
}
/**
* Compute optical duplicates quickly in the standard case where we know that there won't be any transitive distances to worry about.
*
* Note, this is guaranteed to be correct when there are at most 2x reads from a readgroup or 3x with the keeper present
*/
private boolean[] getOpticalDuplicatesFlagFast(List<? extends PhysicalLocation> list, PhysicalLocation actualKeeper, boolean[] opticalDuplicateFlags, Log log, ProgressLogger progressLoggerForKeeper, ProgressLogger progressLoggerForRest, boolean logProgress) {
final int length = list.size();
// First go through and compare all the reads to the keeper
if (actualKeeper != null) {
for (int i = 0; i < length; ++i) {
final PhysicalLocation other = list.get(i);
opticalDuplicateFlags[i] = closeEnough(actualKeeper, other, distance);
opticalDuplicateFlags[i] = closeEnough(actualKeeper, other, this.opticalDuplicatePixelDistance);
// The main point of adding this log and if statement (also below) is a workaround a bug in the JVM
// which causes a deep exception (https://github.com/broadinstitute/picard/issues/472).
// It seems that this is related to https://bugs.openjdk.java.net/browse/JDK-8033717 which
......@@ -163,25 +174,21 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
// every time we tried to duplicate-mark it. The problem seemed to be a duplicate-set of size 500,000,
// and this loop seemed to kill the JVM for some reason. This logging statement (and the one in the
// loop below) solved the problem.
if (logProgress) progressLoggerForKeeper.record(String.format("%d", other.getReadGroup()), other.getX());
}
}
if (logProgress) log.debug("Done with comparing to keeper, now the rest.");
// Now go through and do each pairwise comparison not involving the actualKeeper
for (int i = 0; i < length; ++i) {
final PhysicalLocation lhs = list.get(i);
if (lhs == actualKeeper) continue; // no comparisons to actualKeeper since those are all handled above
// logging here for same reason as above
if (logProgress) progressLoggerForRest.record(String.format("%d", lhs.getReadGroup()), lhs.getX());
for (int j = i + 1; j < length; ++j) {
final PhysicalLocation rhs = list.get(j);
if (rhs == actualKeeper) continue; // no comparisons to actualKeeper since those are all handled above
if (opticalDuplicateFlags[i] && opticalDuplicateFlags[j]) continue; // both already marked, no need to check
if (closeEnough(lhs, rhs, distance)) {
if (opticalDuplicateFlags[i] && opticalDuplicateFlags[j])
continue; // both already marked, no need to check
if (closeEnough(lhs, rhs, this.opticalDuplicatePixelDistance)) {
// At this point we want to mark either lhs or rhs as duplicate. Either could have been marked
// as a duplicate of the keeper (but not both - that's checked above), so be careful about which
// one to now mark as a duplicate.
......@@ -190,10 +197,123 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
}
}
}
return opticalDuplicateFlags;
}
/**
* Compute the optical duplicates correctly in the case where the duplicate group could end up with transitive optical duplicates
*/
private boolean[] getOpticalDuplicatesFlagWithGraph(List<? extends PhysicalLocation> list, PhysicalLocation keeper, boolean[] opticalDuplicateFlags, Log log, ProgressLogger progressLoggerForKeeper, ProgressLogger progressLoggerForRest, boolean logProgress) {
// Make a graph where the edges are reads that lie within the optical duplicate pixel distance from each other,
// we will then use the union-find algorithm to cluster the graph and find optical duplicate groups
final GraphUtils.Graph<Integer> opticalDistanceRelationGraph = new GraphUtils.Graph<>();
if (logProgress) {
log.debug("Building adjacency graph for duplicate group");
}
final Map<Integer, List<Integer>> tileRGmap = new HashMap<>();
int keeperIndex = -1;
for (int i = 0; i < list.size(); i++) {
PhysicalLocation currentLoc = list.get(i);
if (currentLoc == keeper) {
keeperIndex = i;
}
if (currentLoc.hasLocation()) {
final int key = ((int) currentLoc.getReadGroup() << 16) + currentLoc.getTile();
if (tileRGmap.containsKey(key)) {
tileRGmap.get(key).add(i);
} else {
final List<Integer> pLocation = new ArrayList<>();
pLocation.add(i);
tileRGmap.put(key, pLocation);
}
}
opticalDistanceRelationGraph.addNode(i);
}
// Since because finding adjacent optical duplicates is an O(n^2) operation, we can subdivide the input into its
// readgroups in order to reduce the amount of redundant checks across readgroups between reads.
for (List<Integer> tileGroup : tileRGmap.values()) {
if (tileGroup.size() > 1) {
fillGraphFromAGroup(list, tileGroup, logProgress, progressLoggerForKeeper, this.opticalDuplicatePixelDistance, opticalDistanceRelationGraph);
}
}
if (logProgress) {
log.debug("Finished building adjacency graph for duplicate group, moving onto clustering");
}
// Keep a map of the reads and their cluster assignments
final Map<Integer, Integer> opticalDuplicateClusterMap = opticalDistanceRelationGraph.cluster();
final Map<Integer, Integer> clusterToRepresentativeRead = new HashMap<>();
Integer keeperCluster = null;
// Specially mark the keeper as specifically not a duplicate if it exists
if (keeperIndex >= 0) {
clusterToRepresentativeRead.put(opticalDuplicateClusterMap.get(keeperIndex), keeperIndex);
keeperCluster = opticalDuplicateClusterMap.get(keeperIndex);
}
for (final Map.Entry<Integer, Integer> entry : opticalDuplicateClusterMap.entrySet()) {
// logging here for same reason as above
final int recordIndex = entry.getKey();
final int recordAssignedCluster = entry.getValue();
if (logProgress) {
progressLoggerForRest.record(String.format("%d", list.get(recordIndex).getReadGroup()), list.get(recordIndex).getX());
}
// If its not the first read we've seen for this cluster, mark it as an optical duplicate
if (clusterToRepresentativeRead.containsKey(recordAssignedCluster) && recordIndex != keeperIndex) {
final PhysicalLocation representativeLoc = list.get(clusterToRepresentativeRead.get(recordAssignedCluster));
final PhysicalLocation currentRecordLoc = list.get(recordIndex);
// If not in the keeper cluster, then keep the minX -> minY valued duplicate (note the tile must be equal for reads to cluster together)
if (!(keeperIndex >= 0 && recordAssignedCluster == keeperCluster) && // checking we don't accidentally set the keeper as an optical duplicate
(currentRecordLoc.getX() < representativeLoc.getX() || (currentRecordLoc.getX() == representativeLoc.getX() && currentRecordLoc.getY() < representativeLoc.getY()))) {
// Mark the old min as an optical duplicate, and save the new min
opticalDuplicateFlags[clusterToRepresentativeRead.get(recordAssignedCluster)] = true;
clusterToRepresentativeRead.put(recordAssignedCluster, recordIndex);
} else {
// If a smaller read has already been visited, mark the test read as an optical duplicate
opticalDuplicateFlags[recordIndex] = true;
}
} else {
clusterToRepresentativeRead.put(recordAssignedCluster, recordIndex);
}
}
return opticalDuplicateFlags;
}
private void fillGraphFromAGroup(final List<? extends PhysicalLocation> wholeList, final List<Integer> groupList, final boolean logProgress, final ProgressLogger progressLoggerForKeeper, final int distance, final GraphUtils.Graph<Integer> opticalDistanceRelationGraph) {
for (int i = 0; i < groupList.size(); i++) {
final int iIndex = groupList.get(i);
final PhysicalLocation currentLoc = wholeList.get(iIndex);
// The main point of adding this log and if statement (also below) is a workaround a bug in the JVM
// which causes a deep exception (https://github.com/broadinstitute/picard/issues/472).
// It seems that this is related to https://bugs.openjdk.java.net/browse/JDK-8033717 which
// was closed due to non-reproducibility. We came across a bam file that evoked this error
// every time we tried to duplicate-mark it. The problem seemed to be a duplicate-set of size 500,000,
// and this loop seemed to kill the JVM for some reason. This logging statement (and the one in the
// loop below) solved the problem.
if (logProgress) {
progressLoggerForKeeper.record(String.format("%d", currentLoc.getReadGroup()), currentLoc.getX());
}
for (int j = i + 1; j < groupList.size(); j++) {
final int jIndex = groupList.get(j);
final PhysicalLocation other = wholeList.get(jIndex);
if (closeEnoughShort(currentLoc, other, distance)) {
opticalDistanceRelationGraph.addEdge(iIndex, jIndex);
}
}
}
}
/** Returns the keeper if it is contained within the list and has location information, otherwise null. */
private PhysicalLocation keeperOrNull(final List<? extends PhysicalLocation> list, final PhysicalLocation keeper) {
if (keeper != null && keeper.hasLocation()) {
......@@ -213,4 +333,10 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
Math.abs(lhs.getX() - rhs.getX()) <= distance &&
Math.abs(lhs.getY() - rhs.getY()) <= distance;
}
private boolean closeEnoughShort(final PhysicalLocation lhs, final PhysicalLocation rhs, final int distance) {
return lhs != rhs &&
Math.abs(lhs.getX() - rhs.getX()) <= distance &&
Math.abs(lhs.getY() - rhs.getY()) <= distance;
}
}
......@@ -8,6 +8,6 @@ import org.broadinstitute.barclay.argparser.Argument;
public class PGTagArgumentCollection {
@Argument(doc = "Add PG tag to each read in a SAM or BAM", common = true)
public Boolean ADD_PG_TAG_TO_READS = true;
public boolean ADD_PG_TAG_TO_READS = true;
}
\ No newline at end of file
......@@ -34,10 +34,14 @@ import htsjdk.variant.vcf.VCFConstants;
import org.apache.commons.lang3.ArrayUtils;
import picard.vcf.LiftoverVcf;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class LiftoverUtils {
/**
......@@ -52,13 +56,14 @@ public class LiftoverUtils {
public static final Collection<String> DEFAULT_TAGS_TO_REVERSE = Arrays.asList("AF");
public static final Collection<String> DEFAULT_TAGS_TO_DROP = Arrays.asList("MAX_AF");
public static final Log log = Log.getInstance(LiftoverUtils.class);
/**
* This will take an input VariantContext and lift to the provided interval.
* If this interval is in the opposite orientation, all alleles and genotypes will be reverse complemented
* and indels will be left-aligned. Currently this is only able to invert biallelic indels, and null will be
* returned for any unsupported VC.
*
* @param source The VariantContext to lift
* @param target The target interval
* @param refSeq The reference sequence, which should match the target interval
......@@ -112,6 +117,7 @@ public class LiftoverUtils {
* This is a utility method that will convert a list of alleles into a list of base strings. Reference status
* is ignored when creating these strings (i.e. 'A', not 'A*'). These strings should be sufficient
* to recreate an Allele using Allele.create()
*
* @param alleles The list of alleles
* @return A list of strings representing the bases of the input alleles.
*/
......@@ -140,18 +146,14 @@ public class LiftoverUtils {
throw new IllegalArgumentException("This should only be called for negative strand liftovers");
}
// not currently supported
if (source.isIndel() && !source.isBiallelic()) {
return null;
}
final List<Allele> origAlleles = new ArrayList<>(source.getAlleles());
final VariantContextBuilder vcb = new VariantContextBuilder(source);
vcb.chr(target.getContig());
// By convention, indels are left aligned and include the base prior to that indel.
// When reverse complementing, will be necessary to include this additional base.
// This check prevents the presumably rare situation in which the indel occurs on the first position of the sequence
// This check prevents the extremely rare situation in which the indel occurs on the
// first base of the sequence
final boolean addToStart = source.isIndel() && target.getStart() > 1;
final int start = target.getStart() - (addToStart ? 1 : 0);
......@@ -162,7 +164,7 @@ public class LiftoverUtils {
vcb.alleles(reverseComplementAlleles(origAlleles, target, refSeq, source.isIndel(), addToStart));
if (source.isIndel()) {
if (!source.isSNP()) {
// check that the reverse complemented bases match the new reference
if (!referenceAlleleMatchesReferenceForIndel(vcb.getAlleles(), refSeq, start, stop)) {
return null;
......@@ -175,22 +177,21 @@ public class LiftoverUtils {
return vcb;
}
private static List<Allele> reverseComplementAlleles(final List<Allele> originalAlleles, final Interval target, final ReferenceSequence refSeq, final boolean isBiAllelicIndel, final boolean addToStart){
private static List<Allele> reverseComplementAlleles(final List<Allele> originalAlleles, final Interval target, final ReferenceSequence refSeq, final boolean isIndel, final boolean addToStart) {
final List<Allele> alleles = new ArrayList<>();
for (final Allele oldAllele : originalAlleles) {
alleles.add(LiftoverUtils.reverseComplement(oldAllele, target, refSeq, isBiAllelicIndel, addToStart));
alleles.add(LiftoverUtils.reverseComplement(oldAllele, target, refSeq, isIndel, addToStart));
}
return alleles;
}
private static Allele reverseComplement(final Allele oldAllele, final Interval target, final ReferenceSequence referenceSequence, final boolean isBiAllelicIndel, final boolean addToStart){
private static Allele reverseComplement(final Allele oldAllele, final Interval target, final ReferenceSequence referenceSequence, final boolean isIndel, final boolean addToStart) {
if (oldAllele.isSymbolic() || oldAllele.isNoCall()) {
return oldAllele;
}
else if (isBiAllelicIndel) {
} else if (isIndel) {
// target.getStart is 1-based, reference bases are 0-based
final StringBuilder alleleBuilder = new StringBuilder(target.getEnd() - target.getStart() + 1);
......@@ -344,7 +345,6 @@ public class LiftoverUtils {
*
* Based on Adrian Tan, Gon&ccedil;alo R. Abecasis and Hyun Min Kang. (2015)
* Unified Representation of Genetic Variants. Bioinformatics.
*
*/
protected static void leftAlignVariant(final VariantContextBuilder builder, final int start, final int end, final List<Allele> alleles, final ReferenceSequence referenceSequence) {
......@@ -422,7 +422,9 @@ public class LiftoverUtils {
.collect(Collectors.toMap(Map.Entry::getKey, me -> Allele.create(me.getValue(), me.getKey().isReference())));
//retain original order:
List<Allele> fixedAlleles = alleles.stream().map(fixedAlleleMap::get).collect(Collectors.toList());
List<Allele> fixedAlleles = alleles.stream()
.map(fixedAlleleMap::get)
.collect(Collectors.toList());
builder.alleles(fixedAlleles);
}
......
......@@ -31,7 +31,9 @@ import htsjdk.samtools.liftover.LiftOver;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.*;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
......@@ -49,6 +51,7 @@ import java.io.File;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.*;
import java.util.stream.Collectors;
/**
* <h3>Summary</h3>
......@@ -184,7 +187,7 @@ public class LiftoverVcf extends CommandLineProgram {
/**
* Filter name to use when a target cannot be lifted over.
*/
public static final String FILTER_CANNOT_LIFTOVER_INDEL = "ReverseComplementedIndel";
public static final String FILTER_CANNOT_LIFTOVER_REV_COMP = "CannotLiftOver";
/**
* Filter name to use when a target cannot be lifted over.
......@@ -206,7 +209,7 @@ public class LiftoverVcf extends CommandLineProgram {
* Filters to be added to the REJECT file.
*/
private static final List<VCFFilterHeaderLine> FILTERS = CollectionUtil.makeList(
new VCFFilterHeaderLine(FILTER_CANNOT_LIFTOVER_INDEL, "Indel falls into a reverse complemented region in the target genome."),
new VCFFilterHeaderLine(FILTER_CANNOT_LIFTOVER_REV_COMP, "Liftover of a variant that needed reverse-complementing failed for unknown reasons."),
new VCFFilterHeaderLine(FILTER_NO_TARGET, "Variant could not be lifted between genome builds."),
new VCFFilterHeaderLine(FILTER_MISMATCHING_REF_ALLELE, "Reference allele does not match reference genome sequence after liftover."),
new VCFFilterHeaderLine(FILTER_INDEL_STRADDLES_TWO_INTERVALS, "Reference allele in Indel is straddling multiple intervals in the chain, and so the results are not well defined.")
......@@ -232,6 +235,11 @@ public class LiftoverVcf extends CommandLineProgram {
*/
public static final String ATTEMPTED_LOCUS = "AttemptedLocus";
/**
* Attribute used to store the position of the failed variant on the target contig prior to finding out that alleles do not match.
*/
public static final String ATTEMPTED_ALLELES = "AttemptedAlleles";
/**
* Metadata to be added to the Passing file.
*/
......@@ -324,9 +332,12 @@ public class LiftoverVcf extends CommandLineProgram {
.modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, ALLOW_MISSING_FIELDS_IN_HEADER)
.build();
final VCFHeader rejectHeader = new VCFHeader(in.getFileHeader());
for (final VCFFilterHeaderLine line : FILTERS) rejectHeader.addMetaDataLine(line);
for (final VCFFilterHeaderLine line : FILTERS) {
rejectHeader.addMetaDataLine(line);
}
rejectHeader.addMetaDataLine(new VCFInfoHeaderLine(ATTEMPTED_LOCUS,1, VCFHeaderLineType.String, "The locus of the variant in the TARGET prior to failing due to mismatching alleles."));
rejectHeader.addMetaDataLine(new VCFInfoHeaderLine(ATTEMPTED_LOCUS, 1, VCFHeaderLineType.String, "The locus of the variant in the TARGET prior to failing due to reference allele mismatching to the target reference."));
rejectHeader.addMetaDataLine(new VCFInfoHeaderLine(ATTEMPTED_ALLELES, 1, VCFHeaderLineType.String, "The alleles of the variant in the TARGET prior to failing due to reference allele mismatching to the target reference."));
rejects.writeHeader(rejectHeader);
......@@ -369,11 +380,7 @@ public class LiftoverVcf extends CommandLineProgram {
final ReferenceSequence refSeq;
// if the target is null OR (the target is reverse complemented AND the variant is a non-biallelic indel or mixed), then we cannot lift it over
if (target.isNegativeStrand() && (ctx.isMixed() || ctx.isIndel() && !ctx.isBiallelic())) {
rejectVariant(ctx, FILTER_CANNOT_LIFTOVER_INDEL);
} else if (!refSeqs.containsKey(target.getContig())) {
if (!refSeqs.containsKey(target.getContig())) {
rejectVariant(ctx, FILTER_NO_TARGET);
final String missingContigMessage = "Encountered a contig, " + target.getContig() + " that is not part of the target reference.";
......@@ -389,7 +396,7 @@ public class LiftoverVcf extends CommandLineProgram {
final VariantContext liftedVC = LiftoverUtils.liftVariant(ctx, target, refSeq, WRITE_ORIGINAL_POSITION, WRITE_ORIGINAL_ALLELES);
// the liftedVC can be null if the liftover fails because of a problem with reverse complementing
if (liftedVC == null) {
rejectVariant(ctx, FILTER_CANNOT_LIFTOVER_INDEL);
rejectVariant(ctx, FILTER_CANNOT_LIFTOVER_REV_COMP);
} else {
tryToAddVariant(liftedVC, refSeq, ctx);
}
......@@ -479,7 +486,6 @@ public class LiftoverVcf extends CommandLineProgram {
* @param vc new {@link VariantContext}
* @param refSeq {@link ReferenceSequence} of new reference
* @param source the original {@link VariantContext} to use for putting the original location information into vc
* @return true if successful, false if failed due to mismatching reference allele.
*/
private void tryToAddVariant(final VariantContext vc, final ReferenceSequence refSeq, final VariantContext source) {
if (!refSeq.getName().equals(vc.getContig())) {
......@@ -514,6 +520,7 @@ public class LiftoverVcf extends CommandLineProgram {
rejects.add(new VariantContextBuilder(source)
.filter(FILTER_MISMATCHING_REF_ALLELE)
.attribute(ATTEMPTED_LOCUS, String.format("%s:%d-%d", vc.getContig(), vc.getStart(), vc.getEnd()))
.attribute(ATTEMPTED_ALLELES, vc.getReference().toString() + "->" + String.join(",", vc.getAlternateAlleles().stream().map(Allele::toString).collect(Collectors.toList())))
.make());
failedAlleleCheck++;
trackLiftedVariantContig(rejectsByContig, source.getContig());
......
package picard.sam;
/*
* The MIT License
*
* Copyright (c) 2018 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SamReaderFactory;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class AddOATagTest {
private static final Path TEST_DIR = Paths.get("testdata/picard/sam/");
private static final Path OA_TEST_DIR = TEST_DIR.resolve("AddOATag/");
@DataProvider(name="testOAData")
Object[][] testOAData() {
return new Object[][]{
new Object[]{OA_TEST_DIR.resolve("aligned_withoutOA.sam").toFile(), OA_TEST_DIR.resolve("aligned_withOA.sam").toFile()},
};
}
@DataProvider(name="testIntervalListData")
Object[][] testIntervalListData() {
return new Object[][]{
new Object[]{TEST_DIR.resolve("aligned.sam").toFile(), null, OA_TEST_DIR.resolve("aligned_no_intervals.sam").toFile()},
new Object[]{TEST_DIR.resolve("aligned.sam").toFile(), new File("testdata/picard/analysis/directed/CollectHsMetrics/chrM.interval_list"), TEST_DIR.resolve("aligned.sam").toFile()},
new Object[]{TEST_DIR.resolve("aligned.sam").toFile(), TEST_DIR.resolve("contiguous.interval_list").toFile(), OA_TEST_DIR.resolve("aligned_with_intervals.sam").toFile()}
};
}
@Test(dataProvider = "testOAData")
public void testWritingOATag(final File testSam, final File truthSam) throws IOException {
final File clpOutput = File.createTempFile("AddOATag", ".bam");
clpOutput.deleteOnExit();
runAddOATag(testSam, clpOutput, null);
validateOATag(clpOutput, truthSam);
}
@Test(dataProvider = "testIntervalListData")
public void testIntervalList(final File inputSam, final File intervalList, final File truthSam) throws IOException {
final File clpOutput = File.createTempFile("AddOATag", ".bam");
clpOutput.deleteOnExit();
runAddOATag(inputSam, clpOutput, intervalList);
validateOATag(clpOutput, truthSam);
}
private static void runAddOATag(final File inputSam, final File output, final File intervalList) throws IOException {
final List<String> args = new ArrayList<>(Arrays.asList(
"INPUT=" + inputSam,
"OUTPUT=" + output
));
if (intervalList != null) {
args.add("INTERVAL_LIST=" + intervalList);
}
AddOATag addOATag = new AddOATag();
Assert.assertEquals(addOATag.instanceMain(args.toArray(new String[0])), 0, "Running addOATag did not succeed");
}
private static void validateOATag(final File testSam, final File truthSam) {
final ArrayList<String> truthOAValues = new ArrayList<>();
final ArrayList<String> testOAValues = new ArrayList<>();
SAMRecordIterator iterator = SamReaderFactory.makeDefault().open(truthSam).iterator();
while (iterator.hasNext()) {
SAMRecord rec = iterator.next();
truthOAValues.add(rec.getStringAttribute(AddOATag.OA));
}
iterator = SamReaderFactory.makeDefault().open(testSam).iterator();
while (iterator.hasNext()) {
SAMRecord rec = iterator.next();
testOAValues.add(rec.getStringAttribute(AddOATag.OA));
}
Assert.assertEquals(testOAValues, truthOAValues);
}
}
......@@ -98,6 +98,11 @@ public class CompareSAMsTest extends CommandLineProgramTest {
testHelper("genomic_sorted.sam", "genomic_sorted.sam", 2, 0, 0, 0, 0, 0, 0, true);
}
@Test
public void testIdenticalExceptHeaderVersion() {
testHelper("genomic_sorted.sam", "genomic_sorted_sam_v1.6.sam", 2, 0, 0, 0, 0, 0, 0, false);
}
@Test
public void testHasNonPrimary() {
testHelper("genomic_sorted.sam", "has_non_primary.sam", 2, 0, 0, 0, 0, 0, 0, true);
......
......@@ -21,8 +21,10 @@ public class ValidateSamFileTest extends CommandLineProgramTest {
return new Object[][] {
{"nofile", ValidateSamFile.ReturnTypes.FAILED.value()},
{"good/sorted-pair.sam", ValidateSamFile.ReturnTypes.SUCCESSFUL.value()},
{"good/sorted-pair-v1.6.sam", ValidateSamFile.ReturnTypes.SUCCESSFUL.value()},
{"bad/unpaired-mate.sam", ValidateSamFile.ReturnTypes.ERRORS.value()},
{"bad/missing-rg-info.sam", ValidateSamFile.ReturnTypes.ERRORS_WARNINGS.value()},
{"bad/missing-rg-info-v1.6.sam", ValidateSamFile.ReturnTypes.ERRORS_WARNINGS.value()},
{"bad/sorted-pair-missing-rg.sam", ValidateSamFile.ReturnTypes.WARNINGS.value()}
};
}
......
......@@ -32,7 +32,6 @@ import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.FormatUtil;
import htsjdk.samtools.util.TestUtil;
import org.testng.Assert;
......@@ -43,6 +42,7 @@ import picard.sam.testers.SamFileTester;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
......@@ -143,44 +143,47 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam
}
@Override
public void test() {
public void test() throws IOException {
testMetrics();
}
/**
* Runs test and returns metrics
* @return Duplication metrics
* @throws IOException
*/
public MetricsFile<DuplicationMetrics, Double> testMetrics() throws IOException {
try {
updateExpectedDuplicationMetrics();
// Read the output and check the duplicate flag
int outputRecords = 0;
final Set<String> sequencingDTErrorsSeen = new HashSet<>();
final SamReader reader = SamReaderFactory.makeDefault().open(getOutput());
try(final SamReader reader = SamReaderFactory.makeDefault().open(getOutput())) {
for (final SAMRecord record : reader) {
outputRecords++;
final String key = samRecordToDuplicatesFlagsKey(record);
if (!this.duplicateFlags.containsKey(key)) {
System.err.println("DOES NOT CONTAIN KEY: " + key);
}
Assert.assertTrue(this.duplicateFlags.containsKey(key));
final boolean value = this.duplicateFlags.get(key);
this.duplicateFlags.remove(key);
if (value != record.getDuplicateReadFlag()) {
System.err.println("Mismatching read:");
System.err.print(record.getSAMString());
}
Assert.assertEquals(record.getDuplicateReadFlag(), value);
Assert.assertEquals(record.getDuplicateReadFlag(), value, "Mismatching read: " + record.getSAMString());
if (testOpticalDuplicateDTTag && MarkDuplicates.DUPLICATE_TYPE_SEQUENCING.equals(record.getAttribute("DT"))) {
sequencingDTErrorsSeen.add(record.getReadName());
}
}
CloserUtil.close(reader);
}
// Ensure the program output the same number of records as were read in
Assert.assertEquals(outputRecords, this.getNumberOfRecords(), ("saw " + outputRecords + " output records, vs. " + this.getNumberOfRecords() + " input records"));
// Check the values written to metrics.txt against our input expectations
final MetricsFile<DuplicationMetrics, Comparable<?>> metricsOutput = new MetricsFile<DuplicationMetrics, Comparable<?>>();
final MetricsFile<DuplicationMetrics, Double> metricsOutput = new MetricsFile<>();
try{
metricsOutput.read(new FileReader(metricsFile));
}
catch (final FileNotFoundException ex) {
System.err.println("Metrics file not found: " + ex);
Assert.fail("Metrics file not found: " + ex.getMessage());
}
Assert.assertEquals(metricsOutput.getMetrics().size(), 1);
final DuplicationMetrics observedMetrics = metricsOutput.getMetrics().get(0);
......@@ -197,6 +200,7 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam
Assert.assertEquals(sequencingDTErrorsSeen.size(), expectedMetrics.READ_PAIR_OPTICAL_DUPLICATES, "READ_PAIR_OPTICAL_DUPLICATES does not match duplicate groups observed in the file");
Assert.assertEquals(sequencingDTErrorsSeen.size(), observedMetrics.READ_PAIR_OPTICAL_DUPLICATES, "READ_PAIR_OPTICAL_DUPLICATES does not match duplicate groups observed in the file");
}
return metricsOutput;
} finally {
TestUtil.recursiveDelete(getOutputDir());
}
......
/*
* The MIT License
*
* Copyright (c) 2018 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.sam.markduplicates;
import org.testng.annotations.Test;
import java.util.Arrays;
/**
* This class defines the individual test cases to check the duplicate set histograms.
*
* @author hogstrom@broadinstitute.org
*/
public class MarkDuplicatesSetSizeHistogramTest extends AbstractMarkDuplicatesCommandLineProgramTest {
@Override
protected MarkDuplicatesSetSizeHistogramTester getTester() {
return new MarkDuplicatesSetSizeHistogramTester();
}
@Test
public void TestSingleSet() {
// This tests checks that if I add two read pairs with the same alignment start, a duplicate
// set size entry of 2 is put into the histogram. The pair is an optical dup, so a 2 entry for
// the 'optical_sets' should also be marked
final MarkDuplicatesSetSizeHistogramTester tester = getTester();
tester.setExpectedOpticalDuplicate(1);
String representativeReadName = "RUNID:1:1:16020:13352";
tester.addMatePair(representativeReadName, 1, 485253, 485253, false, false, false, false, "45M", "45M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.addMatePair("RUNID:1:1:15993:13361", 1, 485253, 485253, false, false, true, true, "44M1S", "44M1S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
// set expected counts in hashmap that takes the form: key=(Histogram Label, histogram bin), value=histogram entry
tester.expectedSetSizeMap.put(Arrays.asList("all_sets", "2.0"), 1.0);
tester.expectedSetSizeMap.put(Arrays.asList("optical_sets", "2.0"), 1.0);
tester.runTest();
}
@Test
public void testOpticalAndNonOpticalSet() {
// This tests checks that if I have two optical dups and one non-optical in the same dup set that the correct histogram entries
// are specified
final MarkDuplicatesSetSizeHistogramTester tester = getTester();
tester.setExpectedOpticalDuplicate(2);
String representativeReadName = "RUNID:1:1:16020:13352";
tester.addMatePair(representativeReadName, 1, 485253, 485253, false, false, false, false, "45M", "45M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.addMatePair("RUNID:1:1:15993:13361", 1, 485253, 485253, false, false, true, true, "44M1S", "44M1S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.addMatePair("RUNID:1:1:15994:13364", 1, 485253, 485253, false, false, true, true, "44M1S", "44M1S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
// add one non-optical duplicate
tester.addMatePair("RUNID:1:1:25993:23361", 1, 485253, 485253, false, false, true, true, "44M1S", "44M1S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.expectedSetSizeMap.put(Arrays.asList("all_sets", "4.0"), 1.0);
tester.expectedSetSizeMap.put(Arrays.asList("optical_sets", "3.0"), 1.0);
tester.runTest();
}
@Test
public void testSingleton() {
// This tests checks if having a read pair that is not duplicated, the correct histogram entry is updated
final MarkDuplicatesSetSizeHistogramTester tester = getTester();
tester.setExpectedOpticalDuplicate(0);
// Add non-duplicate read
tester.addMatePair("RUNID:1:1:15993:13375", 1, 485255, 485255, false, false, false, false, "43M2S", "43M2S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.expectedSetSizeMap.put(Arrays.asList("all_sets", "1.0"), 1.0);
tester.runTest();
}
@Test
public void testMultiRepresentativeReadTags() {
// This tests checks multiple different duplicate sets of varying sizes
final MarkDuplicatesSetSizeHistogramTester tester = getTester();
tester.setExpectedOpticalDuplicate(3);
// Duplicate set: size 2 - all optical
String representativeReadName1 = "RUNID:1:1:16020:13352";
tester.addMatePair(representativeReadName1, 1, 485253, 485253, false, false, false, false, "45M", "45M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.addMatePair("RUNID:1:1:15993:13361", 1, 485253, 485253, false, false, true, true, "44M1S", "44M1S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.expectedSetSizeMap.put(Arrays.asList("all_sets", "3.0"), 1.0);
tester.expectedSetSizeMap.put(Arrays.asList("optical_sets", "3.0"), 1.0);
// Duplicate set: size 3 - all optical
String representativeReadName2 = "RUNID:1:1:15993:13360";
tester.addMatePair(representativeReadName2, 1, 485299, 485299, false, false, false, false, "45M", "45M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.addMatePair("RUNID:1:1:15993:13365", 1, 485299, 485299, false, false, true, true, "44M1S", "44M1S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.addMatePair("RUNID:1:1:15993:13370", 1, 485299, 485299, false, false, true, true, "43M2S", "43M2S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.expectedSetSizeMap.put(Arrays.asList("all_sets", "4.0"), 1.0);
tester.expectedSetSizeMap.put(Arrays.asList("optical_sets", "4.0"), 1.0);
// Add non-duplicate read
tester.addMatePair("RUNID:1:1:15993:13375", 1, 485255, 485255, false, false, false, false, "43M2S", "43M2S", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.expectedSetSizeMap.put(Arrays.asList("all_sets", "1.0"), 1.0);
tester.runTest();
}
}
/*
* The MIT License
*
* Copyright (c) 2018 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.sam.markduplicates;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.TestUtil;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import picard.cmdline.CommandLineProgram;
import picard.sam.DuplicationMetrics;
import java.io.IOException;
import java.util.*;
/**
* This class is an extension of AbstractMarkDuplicatesCommandLineProgramTester used to test MarkDuplicatesWithMateCigar with SAM files generated on the fly.
* This performs the underlying tests defined by classes such as see AbstractMarkDuplicatesCommandLineProgramTest and MarkDuplicatesWithMateCigarTest.
*/
public class MarkDuplicatesSetSizeHistogramTester extends AbstractMarkDuplicatesCommandLineProgramTester {
final public Map<List<String>, Double> expectedSetSizeMap = new HashMap<>(); // key=(Histogram Label, histogram bin), value=histogram entry
@Override
protected CommandLineProgram getProgram() {
return new MarkDuplicates();
}
@Override
public void test() throws IOException {
final MetricsFile<DuplicationMetrics, Double> metricsOutput = testMetrics();
// Check contents of set size bin against expected values
if (!expectedSetSizeMap.isEmpty()) {
boolean checked = false;
for (final Histogram<Double> histo : metricsOutput.getAllHistograms()) {
final String label = histo.getValueLabel();
for (final Double bin : histo.keySet()) {
final String binStr = String.valueOf(bin);
final List<String> labelBinStr = Arrays.asList(label, binStr);
if (expectedSetSizeMap.containsKey(labelBinStr)) {
checked = true;
Histogram.Bin<Double> binValue = histo.get(bin);
final double actual = binValue.getValue();
final double expected = expectedSetSizeMap.get(labelBinStr);
Assert.assertEquals(actual, expected);
}
}
}
if (!checked) {
Assert.fail("Could not not find matching entry for expectedSetSizeMap in metrics.");
}
}
}
@AfterClass
public void afterTest() {
TestUtil.recursiveDelete(getOutputDir());
}
}