Skip to content
Commits on Source (5)
picard-tools (2.18.23+dfsg-1) UNRELEASED; urgency=medium
* New upstream version
* debhelper 12
* Standards-Version: 4.3.0
-- Andreas Tille <tille@debian.org> Fri, 11 Jan 2019 22:52:33 +0100
picard-tools (2.18.17+dfsg-1) unstable; urgency=medium
[ Emmanuel Bourg ]
......
......@@ -8,7 +8,7 @@ Uploaders: Charles Plessy <plessy@debian.org>,
Section: science
Priority: optional
Build-Depends: default-jdk (>= 2:1.9~),
debhelper (>= 11~),
debhelper (>= 12~),
javahelper,
gradle-debian-helper,
maven-repo-helper,
......@@ -28,7 +28,7 @@ Build-Depends: default-jdk (>= 2:1.9~),
libhtsjdk-java-doc,
libguava-java-doc,
libjs-jquery
Standards-Version: 4.2.1
Standards-Version: 4.3.0
Vcs-Browser: https://salsa.debian.org/med-team/picard-tools
Vcs-Git: https://salsa.debian.org/med-team/picard-tools.git
Homepage: http://broadinstitute.github.io/picard/
......
......@@ -167,8 +167,7 @@ public class RnaSeqMetricsCollector extends SAMRecordMultiLevelCollector<RnaSeqM
}
if (intersectionLength/(double)fragmentInterval.length() >= rrnaFragmentPercentage) {
// Assume entire read is ribosomal.
// TODO: Should count reads, not bases?
metrics.RIBOSOMAL_BASES += rec.getReadLength();
metrics.RIBOSOMAL_BASES += getNumAlignedBases(rec);
metrics.PF_ALIGNED_BASES += getNumAlignedBases(rec);
return;
}
......
......@@ -281,7 +281,7 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
FIRST_TILE, TILE_LIMIT, queryNameComparator,
new FastqRecordsForClusterCodec(readStructure.templates.length(),
readStructure.sampleBarcodes.length(), readStructure.molecularBarcode.length()),
FastqRecordsForCluster.class, bclQualityEvaluationStrategy, INCLUDE_NON_PF_READS, IGNORE_UNEXPECTED_BARCODES);
FastqRecordsForCluster.class, bclQualityEvaluationStrategy, IGNORE_UNEXPECTED_BARCODES);
} else {
basecallsConverter = new IlluminaBasecallsConverter<>(BASECALLS_DIR, BARCODES_DIR, LANE, readStructure,
sampleBarcodeFastqWriterMap, demultiplex, Math.max(1, MAX_READS_IN_RAM_PER_TILE / readsPerCluster), TMP_DIR, NUM_PROCESSORS,
......
......@@ -312,7 +312,7 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
TMP_DIR, NUM_PROCESSORS,
FIRST_TILE, TILE_LIMIT, new QueryNameComparator(),
new Codec(numOutputRecords),
SAMRecordsForCluster.class, bclQualityEvaluationStrategy, INCLUDE_NON_PF_READS, IGNORE_UNEXPECTED_BARCODES);
SAMRecordsForCluster.class, bclQualityEvaluationStrategy, IGNORE_UNEXPECTED_BARCODES);
} else {
basecallsConverter = new IlluminaBasecallsConverter<>(BASECALLS_DIR, BARCODES_DIR, LANE, readStructure,
barcodeSamWriterMap, true, MAX_READS_IN_RAM_PER_TILE / numOutputRecords, TMP_DIR, NUM_PROCESSORS, FORCE_GC,
......
......@@ -14,9 +14,11 @@ import picard.illumina.parser.ReadStructure;
import picard.illumina.parser.readers.AbstractIlluminaPositionFileReader;
import picard.illumina.parser.readers.BclQualityEvaluationStrategy;
import picard.illumina.parser.readers.LocsFileReader;
import picard.util.ThreadPoolExecutorUtil;
import picard.util.ThreadPoolExecutorWithExceptions;
import java.io.File;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
......@@ -24,9 +26,7 @@ import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
......@@ -36,12 +36,10 @@ public class NewIlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> extends Baseca
private final List<AbstractIlluminaPositionFileReader.PositionInfo> locs = new ArrayList<>();
private final File[] filterFiles;
private final Map<String, ThreadPoolExecutorWithExceptions> barcodeWriterThreads = new HashMap<>();
private final Map<Integer, List<RecordWriter>> completedWork = Collections.synchronizedMap(new HashMap<>());
private final Map<Integer, File> barcodesFiles = new HashMap<>();
private final boolean includeNonPfReads;
/**
* Constructor setting includeNonPfReads to true by default.
*
* @param basecallsDir Where to read basecalls from.
* @param barcodesDir Where to read barcodes from (optional; use basecallsDir if not specified).
* @param lane What lane to process.
......@@ -73,52 +71,11 @@ public class NewIlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> extends Baseca
final Class<CLUSTER_OUTPUT_RECORD> outputRecordClass,
final BclQualityEvaluationStrategy bclQualityEvaluationStrategy,
final boolean ignoreUnexpectedBarcodes) {
this(basecallsDir, barcodesDir, lane, readStructure, barcodeRecordWriterMap,
demultiplex, maxReadsInRamPerTile, tmpDirs, numProcessors, firstTile,
tileLimit, outputRecordComparator, codecPrototype, outputRecordClass,
bclQualityEvaluationStrategy, true, ignoreUnexpectedBarcodes);
}
/**
* @param basecallsDir Where to read basecalls from.
* @param barcodesDir Where to read barcodes from (optional; use basecallsDir if not specified).
* @param lane What lane to process.
* @param readStructure How to interpret each cluster.
* @param barcodeRecordWriterMap Map from barcode to CLUSTER_OUTPUT_RECORD writer. If demultiplex is false, must contain
* one writer stored with key=null.
* @param demultiplex If true, output is split by barcode, otherwise all are written to the same output stream.
* @param maxReadsInRamPerTile Configures number of reads each tile will store in RAM before spilling to disk.
* @param tmpDirs For SortingCollection spilling.
* @param numProcessors Controls number of threads. If <= 0, the number of threads allocated is
* available cores - numProcessors.
* @param firstTile (For debugging) If non-null, start processing at this tile.
* @param tileLimit (For debugging) If non-null, process no more than this many tiles.
* @param outputRecordComparator For sorting output records within a single tile.
* @param codecPrototype For spilling output records to disk.
* @param outputRecordClass Inconveniently needed to create SortingCollections.
* @param includeNonPfReads If true, will include ALL reads (including those which do not have PF set)
* @param ignoreUnexpectedBarcodes If true, will ignore reads whose called barcode is not found in barcodeRecordWriterMap,
*/
public NewIlluminaBasecallsConverter(final File basecallsDir, final File barcodesDir, final int lane,
final ReadStructure readStructure,
final Map<String, ? extends ConvertedClusterDataWriter<CLUSTER_OUTPUT_RECORD>> barcodeRecordWriterMap,
final boolean demultiplex,
final int maxReadsInRamPerTile,
final List<File> tmpDirs, final int numProcessors,
final Integer firstTile,
final Integer tileLimit,
final Comparator<CLUSTER_OUTPUT_RECORD> outputRecordComparator,
final SortingCollection.Codec<CLUSTER_OUTPUT_RECORD> codecPrototype,
final Class<CLUSTER_OUTPUT_RECORD> outputRecordClass,
final BclQualityEvaluationStrategy bclQualityEvaluationStrategy,
final boolean includeNonPfReads,
final boolean ignoreUnexpectedBarcodes) {
super(barcodeRecordWriterMap, maxReadsInRamPerTile, tmpDirs, codecPrototype, ignoreUnexpectedBarcodes,
demultiplex, outputRecordComparator, bclQualityEvaluationStrategy,
outputRecordClass, numProcessors, new IlluminaDataProviderFactory(basecallsDir,
barcodesDir, lane, readStructure, bclQualityEvaluationStrategy));
this.includeNonPfReads = includeNonPfReads;
this.tiles = new ArrayList<>();
barcodeRecordWriterMap.keySet().forEach(barcode -> barcodeWriterThreads.put(barcode, new ThreadPoolExecutorWithExceptions(1)));
......@@ -196,31 +153,26 @@ public class NewIlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> extends Baseca
completedWorkExecutor.shutdown();
//thread by surface tile
final ThreadPoolExecutor tileProcessingExecutor = new ThreadPoolExecutorWithExceptions(numThreads);
workChecker.setTileProcessingExecutor(tileProcessingExecutor);
final ThreadPoolExecutorWithExceptions tileProcessingExecutor = new ThreadPoolExecutorWithExceptions(numThreads);
for (final Integer tile : tiles) {
tileProcessingExecutor.submit(new TileProcessor(tile, barcodesFiles.get(tile), workChecker));
tileProcessingExecutor.submit(new TileProcessor(tile, barcodesFiles.get(tile)));
}
tileProcessingExecutor.shutdown();
awaitThreadPoolTermination("Reading executor", tileProcessingExecutor);
awaitThreadPoolTermination("Tile completion executor", completedWorkExecutor);
ThreadPoolExecutorUtil.awaitThreadPoolTermination("Reading executor", tileProcessingExecutor, Duration.ofMinutes(5));
// if there was an exception reading then initiate an immediate shutdown.
if (tileProcessingExecutor.exception != null) {
int tasksStillRunning = completedWorkExecutor.shutdownNow().size();
tasksStillRunning += barcodeWriterThreads.values().stream().mapToLong(executor -> executor.shutdownNow().size()).sum();
throw new PicardException("Reading executor had exceptions. There were " + tasksStillRunning
+ " tasks were still running or queued and have been cancelled.", tileProcessingExecutor.exception);
} else {
ThreadPoolExecutorUtil.awaitThreadPoolTermination("Tile completion executor", completedWorkExecutor, Duration.ofMinutes(5));
barcodeWriterThreads.values().forEach(ThreadPoolExecutor::shutdown);
barcodeWriterThreads.forEach((barcode, executor) -> awaitThreadPoolTermination(barcode + " writer", executor));
}
private void awaitThreadPoolTermination(final String executorName, final ThreadPoolExecutor executorService) {
try {
while (!executorService.awaitTermination(1, TimeUnit.SECONDS)) {
log.info(String.format("%s waiting for job completion. Finished jobs - %d : Running jobs - %d : Queued jobs - %d",
executorName, executorService.getCompletedTaskCount(), executorService.getActiveCount(),
executorService.getQueue().size()));
}
} catch (final InterruptedException e) {
e.printStackTrace();
barcodeWriterThreads.forEach((barcode, executor) -> ThreadPoolExecutorUtil.awaitThreadPoolTermination(barcode + " writer", executor, Duration.ofMinutes(5)));
}
}
......@@ -249,16 +201,30 @@ public class NewIlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> extends Baseca
}
}
private class Closer implements Runnable {
private final ConvertedClusterDataWriter<CLUSTER_OUTPUT_RECORD> writer;
private final String barcode;
private Closer(final ConvertedClusterDataWriter<CLUSTER_OUTPUT_RECORD> writer, final String barcode) {
this.writer = writer;
this.barcode = barcode;
}
@Override
public void run() {
log.debug("Closing writer for barcode " + barcode);
this.writer.close();
}
}
private class TileProcessor implements Runnable {
private final int tileNum;
private final Map<String, SortingCollection<CLUSTER_OUTPUT_RECORD>> barcodeToRecordCollection = new HashMap<>();
private final File barcodeFile;
private final CompletedWorkChecker workChecker;
TileProcessor(final int tileNum, final File barcodeFile, CompletedWorkChecker workChecker) {
TileProcessor(final int tileNum, final File barcodeFile) {
this.tileNum = tileNum;
this.barcodeFile = barcodeFile;
this.workChecker = workChecker;
}
@Override
......@@ -268,13 +234,9 @@ public class NewIlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> extends Baseca
while (dataProvider.hasNext()) {
final ClusterData cluster = dataProvider.next();
readProgressLogger.record(null, 0);
// If this cluster is passing, or we do NOT want to ONLY emit passing reads, then add it to
// the next
if (cluster.isPf() || includeNonPfReads) {
final String barcode = (demultiplex ? cluster.getMatchedBarcode() : null);
addRecord(barcode, converter.convertClusterToOutputRecord(cluster));
}
}
dataProvider.close();
......@@ -285,8 +247,7 @@ public class NewIlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> extends Baseca
writerList.add(new RecordWriter(writer, value, barcode));
});
workChecker.registerCompletedWork(tileNum, writerList);
completedWork.put(tileNum, writerList);
log.info("Finished processing tile " + tileNum);
}
......@@ -314,21 +275,19 @@ public class NewIlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> extends Baseca
final int maxRecordsInRam =
Math.max(1, maxReadsInRamPerTile /
barcodeRecordWriterMap.size());
return SortingCollection.newInstance(
return SortingCollection.newInstanceFromPaths(
outputRecordClass,
codecPrototype.clone(),
outputRecordComparator,
maxRecordsInRam,
tmpDirs);
IOUtil.filesToPaths(tmpDirs));
}
}
private class CompletedWorkChecker implements Runnable {
private final Map<Integer, List<RecordWriter>> completedWork = new ConcurrentHashMap<>();
private int currentTileIndex = 0;
private ThreadPoolExecutor tileProcessingExecutor;
@Override
public void run() {
......@@ -337,30 +296,19 @@ public class NewIlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> extends Baseca
if (completedWork.containsKey(currentTile)) {
log.info("Writing out tile " + currentTile);
completedWork.get(currentTile).forEach(writer -> barcodeWriterThreads.get(writer.getBarcode()).submit(writer));
completedWork.remove(currentTile);
currentTileIndex++;
} else {
try {
Thread.sleep(5000);
} catch (final InterruptedException e) {
throw new PicardException(e.getMessage(), e);
}
if(tileProcessingExecutor.isTerminated() && completedWork.size() == 0){
break;
}
}
//we are all done scheduling work.. now schedule the closes
barcodeRecordWriterMap.forEach((barcode, writer) -> {
ThreadPoolExecutorWithExceptions executorService = barcodeWriterThreads.get(barcode);
executorService.shutdown();
awaitThreadPoolTermination( "Barcode writer (" + barcode + ")", executorService);
log.debug("Closing writer for barcode " + barcode);
writer.close();
});
barcodeRecordWriterMap.forEach((barcode, writer) -> barcodeWriterThreads.get(barcode).submit(new Closer(writer, barcode)));
}
void registerCompletedWork(int tileNum, List<RecordWriter> writerList) {
completedWork.put(tileNum, writerList);
}
void setTileProcessingExecutor(ThreadPoolExecutor tileProcessingExecutor) {
this.tileProcessingExecutor = tileProcessingExecutor;
}
}
}
......@@ -42,7 +42,7 @@ public abstract class BaseIlluminaDataProvider implements Iterator<ClusterData>,
}
protected void addData(final ClusterData clusterData, final PfData pfData) {
clusterData.setPf(pfData.isPfRead());
clusterData.setPf(pfData.isPf());
}
protected void addData(final ClusterData clusterData, final BarcodeData barcodeData) {
......@@ -72,9 +72,9 @@ public abstract class BaseIlluminaDataProvider implements Iterator<ClusterData>,
for (int i = 0; i < numReads; i++) {
clusterData.getRead(i).setQualities(qualities[i]);
}
clusterData.setPf(cbclData.isPfRead());
clusterData.setX(cbclData.getXCoordinate());
clusterData.setY(cbclData.getYCoordinate());
clusterData.setPf(cbclData.isPf());
clusterData.setX(cbclData.getPositionInfo().xQseqCoord);
clusterData.setY(cbclData.getPositionInfo().yQseqCoord);
}
abstract void seekToTile(int seekAfterFirstRead);
......
......@@ -9,27 +9,30 @@ import picard.illumina.parser.readers.AbstractIlluminaPositionFileReader;
public class CbclData extends BclData implements PfData, PositionalData {
private final int tile;
private AbstractIlluminaPositionFileReader.PositionInfo positionInfo;
private boolean pfRead;
public CbclData(int[] outputLengths, int tile) {
super(outputLengths);
this.tile = tile;
}
//CBCLs currently only contain PF reads.
@Override
public boolean isPf() {
return true;
}
public int getTile() {
return tile;
}
public void setPositionInfo(AbstractIlluminaPositionFileReader.PositionInfo positionInfo) {
this.positionInfo = positionInfo;
public AbstractIlluminaPositionFileReader.PositionInfo getPositionInfo() {
return positionInfo;
}
public void setPfRead(boolean pfRead) {
this.pfRead = pfRead;
public void setPositionInfo(AbstractIlluminaPositionFileReader.PositionInfo positionInfo) {
this.positionInfo = positionInfo;
}
@Override
public int getXCoordinate() {
return this.positionInfo.xQseqCoord;
......@@ -39,9 +42,4 @@ public class CbclData extends BclData implements PfData, PositionalData {
public int getYCoordinate() {
return this.positionInfo.yQseqCoord;
}
@Override
public boolean isPfRead() {
return pfRead;
}
}
......@@ -65,7 +65,7 @@ class FilterParser extends PerTileParser<PfData> {
public PfData next() {
final boolean nextValue = reader.next();
return new PfData() {
public boolean isPfRead() {
public boolean isPf() {
return nextValue;
}
};
......
......@@ -59,7 +59,7 @@ interface RawIntensityData extends IlluminaData{
}
interface PfData extends IlluminaData {
public boolean isPfRead();
public boolean isPf();
}
interface BarcodeData extends IlluminaData {
......
......@@ -46,7 +46,7 @@ public class MultiTileFilterParser extends MultiTileParser<PfData> {
final boolean nextVal = reader.next();
return new PfData() {
@Override
public boolean isPfRead() {
public boolean isPf() {
return nextVal;
}
};
......
......@@ -200,13 +200,13 @@ public class CbclReader extends BaseBclReader implements CloseableIterator<CbclD
for (final int outputLength : outputLengths) {
for (int cycle = 0; cycle < outputLength; cycle++) {
final TileData currentCycleTileInfo = cycleData[totalCycleCount].tileInfo;
final CycleData currentCycleData = cycleData[totalCycleCount];
try {
if (cachedTile[totalCycleCount] == null) {
if (!cachedFilter.containsKey(currentCycleTileInfo.tileNum)) {
cacheFilterAndLocs(currentCycleTileInfo, locs);
if (!cachedFilter.containsKey(cycleData[totalCycleCount].tileInfo.tileNum)) {
cacheFilterAndLocs(cycleData[totalCycleCount].tileInfo, locs);
}
cacheTile(totalCycleCount, currentCycleTileInfo);
cacheTile(totalCycleCount, cycleData[totalCycleCount].tileInfo, currentCycleData);
}
} catch (final IOException e) {
// when logging the error, increment cycle by 1, since totalCycleCount is zero-indexed but Illumina directories are 1-indexed.
......@@ -269,9 +269,7 @@ public class CbclReader extends BaseBclReader implements CloseableIterator<CbclD
private void advance() {
int totalCycleCount = 0;
int tileNum = cycleData[totalCycleCount].tileInfo.tileNum;
final CbclData data = new CbclData(outputLengths, tileNum);
int tilePosition = cachedTilePosition[totalCycleCount];
final CbclData data = new CbclData(outputLengths, cycleData[totalCycleCount].tileInfo.tileNum);
for (int read = 0; read < outputLengths.length; read++) {
for (int cycle = 0; cycle < outputLengths[read]; cycle++) {
......@@ -290,25 +288,31 @@ public class CbclReader extends BaseBclReader implements CloseableIterator<CbclD
totalCycleCount++;
}
}
data.setPositionInfo(positionInfoIterator.next());
data.setPfRead(cachedFilter.get(tileNum).get(tilePosition));
this.queue = data;
}
private void cacheFilterAndLocs(final TileData currentTileData, final List<AbstractIlluminaPositionFileReader.PositionInfo> locs) {
final List<Boolean> filterValues = new ArrayList<>();
final FilterFileReader reader = new FilterFileReader(filterFileMap.get(currentTileData.tileNum));
final Iterator<AbstractIlluminaPositionFileReader.PositionInfo> positionInfoIterator = locs.iterator();
while (reader.hasNext()) {
filterValues.add(reader.next());
}
this.positionInfoIterator = locs.iterator();
final List<AbstractIlluminaPositionFileReader.PositionInfo> positions = new ArrayList<>();
for (final boolean filterValue : filterValues) {
final AbstractIlluminaPositionFileReader.PositionInfo info = positionInfoIterator.next();
if (filterValue) {
positions.add(info);
}
}
this.positionInfoIterator = positions.iterator();
cachedFilter.put(currentTileData.tileNum, filterValues);
}
private void cacheTile(final int totalCycleCount, final TileData tileData) throws IOException {
private void cacheTile(final int totalCycleCount, final TileData tileData, final CycleData currentCycleData) throws IOException {
final byte[] tileByteArray = new byte[tileData.compressedBlockSize];
// Read the whole compressed block into a buffer, then sanity check the length
......@@ -329,11 +333,38 @@ public class CbclReader extends BaseBclReader implements CloseableIterator<CbclD
byte[] decompressedByteArray = decompressTile(totalCycleCount, tileData, byteInputStream);
// Read uncompressed data from the buffer and expand each nibble into a full byte for ease of use
cachedTile[totalCycleCount] = promoteNibblesToBytes(decompressedByteArray);
byte[] unNibbledByteArray = promoteNibblesToBytes(decompressedByteArray);
cachedTile[totalCycleCount] = filterNonPfReads(tileData, currentCycleData, unNibbledByteArray);
cachedTilePosition[totalCycleCount] = 0;
}
private byte[] filterNonPfReads(TileData tileData, CycleData currentCycleData, byte[] unNibbledByteArray) {
// Write buffer contents to cached tile array
// if nonPF reads are included we need to strip them out
if (!currentCycleData.pfExcluded) {
final List<Boolean> filterDatas = cachedFilter.get(tileData.tileNum);
int sum = 0;
for (final boolean b : filterDatas) {
sum += b ? 1 : 0;
}
final byte[] filteredByteArray = new byte[sum];
int filterIndex = 0;
int basecallIndex = 0;
for (final boolean filterData : filterDatas) {
final byte readByte = unNibbledByteArray[filterIndex];
if (filterData) {
filteredByteArray[basecallIndex] = readByte;
basecallIndex++;
}
filterIndex++;
}
return filteredByteArray;
} else {
return unNibbledByteArray;
}
}
private byte[] promoteNibblesToBytes(byte[] decompressedByteArray) {
//we are going to explode the nibbles in to bytes to make PF filtering easier
final byte[] unNibbledByteArray = new byte[decompressedByteArray.length * 2];
......@@ -376,6 +407,10 @@ public class CbclReader extends BaseBclReader implements CloseableIterator<CbclD
return decompressedByteArray;
}
public CycleData[] getCycleData() {
return cycleData;
}
public int getHeaderSize() {
return headerSize;
}
......
......@@ -82,34 +82,49 @@ class UmiUtil {
throw new PicardException("Duplex UMIs must be of the form X-Y where X and Y are equal length UMIs, for example AT-GA. Found UMI, " + umi);
}
if (isTopStrand(record)) {
return split[0] + DUPLEX_UMI_DELIMITER + split[1];
} else {
switch (getStrand(record)) {
case BOTTOM:
return split[1] + DUPLEX_UMI_DELIMITER + split[0];
default:
return umi;
}
}
/**
* Determines if the read represented by a SAM record belongs to the top or bottom strand.
* Determines if the read represented by a SAM record belongs to the top or bottom strand
* or if it cannot determine strand position due to one of the reads being unmapped.
* Top strand is defined as having a read 1 unclipped 5' coordinate
* less than the read 2 unclipped 5' coordinate. If a read is unmapped
* it is considered to have an unclipped 5' coordinate of 0. If the mate
* belongs to a different contig from the read, then the reference
* we do not attempt to determine the strand to which the read or its mate belongs.
* If the mate belongs to a different contig from the read, then the reference
* index for the read and its mate is used in leu of the unclipped 5' coordinate.
* @param rec Record to determine top or bottom strand
* @return Top or bottom strand, true (top), false (bottom).
* @return Top or bottom strand, unknown if it cannot be determined.
*/
static boolean isTopStrand(final SAMRecord rec) {
static ReadStrand getStrand(final SAMRecord rec) {
if (rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag()) {
return ReadStrand.UNKNOWN;
}
// If the read pair are aligned to different contigs we use
// the reference index to determine relative 5' coordinate ordering.
// Both the read and its mate should not have their unmapped flag set to true.
if (!rec.getReferenceIndex().equals(rec.getMateReferenceIndex())) {
return rec.getFirstOfPairFlag() == (rec.getReferenceIndex() < rec.getMateReferenceIndex());
if (rec.getFirstOfPairFlag() == (rec.getReferenceIndex() < rec.getMateReferenceIndex())) {
return ReadStrand.TOP;
} else {
return ReadStrand.BOTTOM;
}
}
final int read5PrimeStart = (rec.getReadNegativeStrandFlag()) ? rec.getUnclippedEnd() : rec.getUnclippedStart();
final int mate5PrimeStart = (rec.getMateNegativeStrandFlag()) ? SAMUtils.getMateUnclippedEnd(rec) : SAMUtils.getMateUnclippedStart(rec);
return rec.getFirstOfPairFlag() == (read5PrimeStart < mate5PrimeStart);
if (rec.getFirstOfPairFlag() == (read5PrimeStart <= mate5PrimeStart)) {
return ReadStrand.TOP;
} else {
return ReadStrand.BOTTOM;
}
}
/**
......@@ -133,7 +148,22 @@ class UmiUtil {
molecularIdentifier.append(assignedUmi);
if (duplexUmis) {
molecularIdentifier.append(isTopStrand(rec) ? TOP_STRAND_DUPLEX : BOTTOM_STRAND_DUPLEX);
// Reads whose strand position can be determined will have their
// molecularIdentifier set to include an identifier appended that
// indicates top or bottom strand.
ReadStrand strand = getStrand(rec);
switch (strand) {
case TOP:
molecularIdentifier.append(TOP_STRAND_DUPLEX);
break;
case BOTTOM:
molecularIdentifier.append(BOTTOM_STRAND_DUPLEX);
break;
default:
// If we can't determine strand position nothing
// is appended to the molecularIdentifier.
break;
}
}
rec.setAttribute(molecularIdentifierTag, molecularIdentifier.toString());
}
......@@ -146,4 +176,14 @@ class UmiUtil {
static int getUmiLength(final String umi) {
return umi.length() - StringUtils.countMatches(umi, UmiUtil.DUPLEX_UMI_DELIMITER);
}
/**
* An enum to hold the strand position (TOP or BOTTOM) of a read.
*/
public enum ReadStrand {
TOP,
BOTTOM,
UNKNOWN
}
}
......@@ -126,7 +126,7 @@ public class OpticalDuplicateFinder extends ReadNameParser implements Serializab
// If there is only one or zero reads passed in (so there are obviously no optical duplicates),
// or if there are too many reads (so we don't want to try to run this expensive n^2 algorithm),
// then just return an array of all false
if (length < 2 || length > maxDuplicateSetSize) {
if (this.readNameRegex == null || length < 2 || length > maxDuplicateSetSize) {
return opticalDuplicateFlags;
}
......
......@@ -31,7 +31,7 @@ public class ReadNameParser implements Serializable {
private final boolean useOptimizedDefaultParsing; // was the regex default?
private final String readNameRegex;
protected final String readNameRegex;
private Pattern readNamePattern;
......@@ -63,7 +63,7 @@ public class ReadNameParser implements Serializable {
* @param log the log to which to write messages.
*/
public ReadNameParser(final String readNameRegex, final Log log) {
this.useOptimizedDefaultParsing = readNameRegex.equals(DEFAULT_READ_NAME_REGEX);
this.useOptimizedDefaultParsing = DEFAULT_READ_NAME_REGEX.equals(readNameRegex);
this.readNameRegex = readNameRegex;
this.log = log;
}
......
package picard.util;
import htsjdk.samtools.util.Log;
import picard.PicardException;
import java.util.concurrent.CancellationException;
......@@ -15,7 +14,8 @@ import java.util.concurrent.TimeUnit;
* while executing
*/
public class ThreadPoolExecutorWithExceptions extends ThreadPoolExecutor {
private static final Log log = Log.getInstance(ThreadPoolExecutorWithExceptions.class);
public Throwable exception = null;
/**
* Creates a fixed size thread pool executor that will rethrow exceptions from submitted jobs.
*
......@@ -42,6 +42,7 @@ public class ThreadPoolExecutorWithExceptions extends ThreadPoolExecutor {
}
}
if (t != null) {
exception = t;
throw new PicardException(t.getMessage(), t);
}
}
......@@ -50,8 +51,7 @@ public class ThreadPoolExecutorWithExceptions extends ThreadPoolExecutor {
protected void beforeExecute(Thread t, Runnable r) {
super.beforeExecute(t, r);
t.setUncaughtExceptionHandler((t1, e) -> {
log.error(e.getCause());
throw new PicardException("Uncaught exception in thread: " + t1.getName() +" : " + e.getCause().getMessage(), e.getCause());
throw new PicardException("Uncaught exception in thread: " + t1.getName() + " : " + e.getMessage(), e);
});
}
}
......@@ -382,6 +382,59 @@ public class CollectRnaSeqMetricsTest extends CommandLineProgramTest {
Assert.assertEquals(metrics.PCT_R2_TRANSCRIPT_STRAND_READS, 0.333333);
}
@Test
public void testPctRibosomalBases() throws Exception {
final String sequence = "chr1";
final String ignoredSequence = "chrM";
// Create some alignments that hit the ribosomal sequence, various parts of the gene, and intergenic.
final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);
// Set seed so that strandedness is consistent among runs.
builder.setRandomSeed(0);
final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence);
builder.addPair("rrnaPair", sequenceIndex, 400, 500, false, false, "35I1M", "35I1M", true, true, -1);
builder.addFrag("frag1", sequenceIndex, 150, true, false, "34I2M", "*", -1);
builder.addFrag("frag2", sequenceIndex, 190, true, false, "35I1M", "*", -1);
builder.addFrag("ignoredFrag", builder.getHeader().getSequenceIndex(ignoredSequence), 1, false);
final File samFile = File.createTempFile("tmp.collectRnaSeqMetrics.", ".sam");
samFile.deleteOnExit();
try (SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile)) {
for (final SAMRecord rec : builder.getRecords()) {
samWriter.addAlignment(rec);
}
}
// Create an interval list with one ribosomal interval.
final Interval rRnaInterval = new Interval(sequence, 300, 520, true, "rRNA");
final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader());
rRnaIntervalList.add(rRnaInterval);
final File rRnaIntervalsFile = File.createTempFile("tmp.rRna.", ".interval_list");
rRnaIntervalsFile.deleteOnExit();
rRnaIntervalList.write(rRnaIntervalsFile);
// Generate the metrics.
final File metricsFile = File.createTempFile("tmp.", ".rna_metrics");
metricsFile.deleteOnExit();
final String[] args = new String[]{
"INPUT=" + samFile.getAbsolutePath(),
"OUTPUT=" + metricsFile.getAbsolutePath(),
"REF_FLAT=" + getRefFlatFile(sequence).getAbsolutePath(),
"RIBOSOMAL_INTERVALS=" + rRnaIntervalsFile.getAbsolutePath(),
"STRAND_SPECIFICITY=SECOND_READ_TRANSCRIPTION_STRAND",
"IGNORE_SEQUENCE=" + ignoredSequence
};
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<RnaSeqMetrics, Comparable<?>> output = new MetricsFile<>();
output.read(new FileReader(metricsFile));
final RnaSeqMetrics metrics = output.getMetrics().get(0);
Assert.assertEquals(metrics.PCT_RIBOSOMAL_BASES, 0.4);
}
public File getRefFlatFile(String sequence) throws Exception {
// Create a refFlat file with a single gene containing two exons, one of which is overlapped by the
// ribosomal interval.
......
......@@ -157,20 +157,20 @@ public class CollectIlluminaBasecallingMetricsTest {
public void testNovaseqIndexedRun() throws Exception {
final MetricsFile<IlluminaBasecallingMetrics, Integer> metricsFile = runIt(1, "151T8B8B151T",
new File("testdata/picard/illumina/151T8B8B151T_cbcl/Data/Intensities/BaseCalls"), null, true);
final IlluminaBasecallingMetrics laneMetric = metricsFile.getMetrics().get(metricsFile.getMetrics().size() - 1);
final IlluminaBasecallingMetrics laneMetric = metricsFile.getMetrics().get(0);
Assert.assertEquals(laneMetric.LANE, "1");
Assert.assertNull(laneMetric.MOLECULAR_BARCODE_SEQUENCE_1);
Assert.assertNull(laneMetric.MOLECULAR_BARCODE_NAME);
Assert.assertEquals(laneMetric.MEAN_CLUSTERS_PER_TILE, 28.0);
Assert.assertEquals(laneMetric.MOLECULAR_BARCODE_SEQUENCE_1, "CACCTAGTACTCGAGT");
Assert.assertEquals(laneMetric.MOLECULAR_BARCODE_NAME, "SA_CACCTAGTACTCGAGT");
Assert.assertEquals(laneMetric.MEAN_CLUSTERS_PER_TILE, 1.0);
Assert.assertEquals(laneMetric.SD_CLUSTERS_PER_TILE, 0.0);
Assert.assertEquals(laneMetric.MEAN_PF_CLUSTERS_PER_TILE,25.0);
Assert.assertEquals(laneMetric.MEAN_PF_CLUSTERS_PER_TILE,1.0);
Assert.assertEquals(laneMetric.SD_PF_CLUSTERS_PER_TILE, 0.0);
Assert.assertEquals(laneMetric.MEAN_PCT_PF_CLUSTERS_PER_TILE, 89.29);
Assert.assertEquals(laneMetric.MEAN_PCT_PF_CLUSTERS_PER_TILE, 100.0);
Assert.assertEquals(laneMetric.SD_PCT_PF_CLUSTERS_PER_TILE, 0.0);
Assert.assertEquals(laneMetric.TOTAL_BASES, 16912);
Assert.assertEquals(laneMetric.TOTAL_READS, 112);
Assert.assertEquals(laneMetric.PF_BASES, 15100);
Assert.assertEquals(laneMetric.PF_READS, 100);
Assert.assertEquals(laneMetric.TOTAL_BASES, 302);
Assert.assertEquals(laneMetric.TOTAL_READS, 2);
Assert.assertEquals(laneMetric.PF_BASES, 302);
Assert.assertEquals(laneMetric.PF_READS, 2);
Assert.assertEquals(metricsFile.getMetrics().size(),6);
......
......@@ -288,8 +288,8 @@ public class ExtractIlluminaBarcodesTest extends CommandLineProgramTest {
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> result = new MetricsFile<>();
result.read(new FileReader(metricsFile));
Assert.assertEquals(result.getMetrics().get(0).PF_PERFECT_MATCHES, 1, "Got wrong number of perfect matches");
Assert.assertEquals(result.getMetrics().get(0).PF_ONE_MISMATCH_MATCHES, 0, "Got wrong number of one-mismatch matches");
Assert.assertEquals(result.getMetrics().get(0).PERFECT_MATCHES, 1, "Got wrong number of perfect matches");
Assert.assertEquals(result.getMetrics().get(0).ONE_MISMATCH_MATCHES, 0, "Got wrong number of one-mismatch matches");
}
/**
......