Skip to content
Commits on Source (6)
......@@ -10,6 +10,11 @@ guarantees will be maintained within each major version series.
## Active
## [0.19.0] - 2018-09-11
### Added
- TranscriptAlignmentSet to XML support
## [0.17.0] - 2018-03-18
### Added
......
......@@ -3,7 +3,7 @@
########################################################################
cmake_policy(SET CMP0048 NEW) # lets us set version in project()
project(PacBioBAM VERSION 0.17.0 LANGUAGES CXX C)
project(PacBioBAM VERSION 0.19.0 LANGUAGES CXX C)
cmake_minimum_required(VERSION 3.0)
# project name & version
......
pbbam (0.19.0+dfsg-1) unstable; urgency=medium
* Team upload.
* New upstream version
* Fix samtools call in tests
* d/control: Bump (fake) SOVERSION (=release version)
-- Andreas Tille <tille@debian.org> Wed, 10 Oct 2018 12:45:02 +0200
pbbam (0.18.0+dfsg-2) unstable; urgency=medium
* Team upload.
......
......@@ -27,7 +27,7 @@ Package: pbbamtools
Architecture: any
Depends: ${shlibs:Depends},
${misc:Depends},
libpbbam0.18.0 (= ${binary:Version})
libpbbam0.19.0 (= ${binary:Version})
Recommends: samtools
Description: processing Pacific Biosciences binary alignment/map files
The BAM format is a binary, compressed, record-oriented container format
......@@ -42,7 +42,7 @@ Description: processing Pacific Biosciences binary alignment/map files
This package provides command-line utilities for working with PacBio BAM
files.
Package: libpbbam0.18.0
Package: libpbbam0.19.0
Architecture: any
Multi-Arch: same
Section: libs
......@@ -66,7 +66,7 @@ Description: Pacific Biosciences binary alignment/map (BAM) library
Package: libpbbam-dev
Architecture: any
Section: libdevel
Depends: libpbbam0.18.0 (= ${binary:Version}),
Depends: libpbbam0.19.0 (= ${binary:Version}),
libhts-dev,
libssl-dev,
${misc:Depends}
......
......@@ -56,6 +56,7 @@ override_dh_auto_clean:
-e 's|@PacBioBAM_VERSION@|$(DEB_VERSION_UPSTREAM)|g' \
-e 's|@GeneratedTestDataDir@|$(generated_data_dir)|g' \
-e 's|+dfsg||g' -e 's|\(/build/$(DEB_SOURCE)-[0-9.]\+\)/|\1+dfsg/|' \
-e 's/$$SAMTOOLS/samtools/g' \
$< > $@
override_dh_install:
......
......@@ -87,14 +87,39 @@ public:
BinCalculation_OFF
};
///
/// \brief The Config struct provides a "parameter object" for BamWriter
/// settings. This allows for writer configuration without having to
/// refer to ordering of parameters, default values, etc.
///
struct Config
{
Config() = default;
// zlib compression level
CompressionLevel compressionLevel = DefaultCompression;
// The number of threads for compression. If set to 0, BamWriter will
// attempt to determine a reasonable estimate. If set to 1, this will
// force single-threaded execution. No checks are made against an upper limit.
size_t numThreads = 4;
// If ON, ensures that proper BAI bin numbers are provided for all records.
BamWriter::BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON;
// If true, write to <filename>.tmp, and rename to <filename> in dtor.
// This allows downstream checks to see if BAM file may be truncated
// due to early termination (e.g. a thrown exception). If false, write
// directly to <filename>.
bool useTempFile = true;
};
public:
/// \name Constructors & Related Methods
/// \{
/// \brief Opens a %BAM file for writing & writes the header information.
///
/// The error status will be set if either operation fails.
///
/// \note Set \p filename to "-" for stdout.
///
/// \param[in] filename path to output %BAM file
......@@ -111,13 +136,32 @@ public:
/// records written. This extra step may turned off when bin
/// numbers are not needed. Though if in doubt, keep the default.
///
/// \param[in] useTempFile If true, write to <filename>.tmp, and rename
/// to <filename>. This provides for downstream
/// checks to see if BAM file may be truncated
/// due to early termination (a thrown exception).
///
/// \throws std::runtmie_error if there was a problem opening the file for
/// writing or if an error occurred while writing the header
///
BamWriter(const std::string& filename, const BamHeader& header,
const BamWriter::CompressionLevel compressionLevel = BamWriter::DefaultCompression,
const size_t numThreads = 4,
const BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON);
const BinCalculationMode binCalculationMode = BamWriter::BinCalculation_ON,
const bool useTempFile = true);
///
/// \brief Opens a %BAM file for writing & writes the header information.
///
/// \param[in] filename path to output %BAM file
/// \param[in] header BamHeader object
/// \param[in] config container for add'l configuration options
///
/// \throws std::runtmie_error if there was a problem opening the file for
/// writing or if an error occurred while writing the header
///
BamWriter(const std::string& filename, const BamHeader& header,
const BamWriter::Config& config);
/// Fully flushes all buffered data & closes file.
~BamWriter() override;
......
......@@ -434,6 +434,28 @@ public:
};
/// \}
template <typename T>
static inline bool Check(const T& lhs, const T& rhs, const Compare::Type cmp)
{
switch (cmp) {
case Compare::EQUAL:
return lhs == rhs;
case Compare::LESS_THAN:
return lhs < rhs;
case Compare::LESS_THAN_EQUAL:
return lhs <= rhs;
case Compare::GREATER_THAN:
return lhs > rhs;
case Compare::GREATER_THAN_EQUAL:
return lhs >= rhs;
case Compare::NOT_EQUAL:
return lhs != rhs;
default:
assert(false);
throw std::runtime_error{"unsupported compare type requested"};
}
}
};
} // namespace BAM
......
......@@ -46,7 +46,8 @@ public:
HDF_SUBREAD,
REFERENCE,
SUBREAD,
TRANSCRIPT
TRANSCRIPT,
TRANSCRIPT_ALIGNMENT
};
/// \brief Converts printable dataset type to type enum.
......
......@@ -861,6 +861,16 @@ public:
TranscriptSet();
};
/// \brief The TranscriptAlignmentSet class represents a %TranscriptAlignmentSet
/// root element in DataSetXML.
///
class PBBAM_EXPORT TranscriptAlignmentSet : public DataSetBase
{
public:
/// \brief Creates an empty TranscriptAlignmentSet dataset.
TranscriptAlignmentSet();
};
} // namespace BAM
} // namespace PacBio
......
......@@ -35,7 +35,7 @@ public:
protected:
FilterBase(T value, const Compare::Type cmp);
FilterBase(std::vector<T> values);
FilterBase(std::vector<T> values, const Compare::Type cmp = Compare::EQUAL);
protected:
bool CompareHelper(const T& lhs) const;
......@@ -54,7 +54,7 @@ struct BarcodeDataFilterBase : public FilterBase<T>
{
protected:
BarcodeDataFilterBase(T value, const Compare::Type cmp);
BarcodeDataFilterBase(std::vector<T> values);
BarcodeDataFilterBase(std::vector<T> values, const Compare::Type cmp = Compare::EQUAL);
public:
bool Accepts(const PbiRawData& idx, const size_t row) const;
......@@ -69,7 +69,7 @@ struct BasicDataFilterBase : public FilterBase<T>
{
protected:
BasicDataFilterBase(T value, const Compare::Type cmp);
BasicDataFilterBase(std::vector<T> values);
BasicDataFilterBase(std::vector<T> values, const Compare::Type cmp = Compare::EQUAL);
public:
bool Accepts(const PbiRawData& idx, const size_t row) const;
......@@ -84,7 +84,7 @@ struct MappedDataFilterBase : public FilterBase<T>
{
protected:
MappedDataFilterBase(T value, const Compare::Type cmp);
MappedDataFilterBase(std::vector<T> values);
MappedDataFilterBase(std::vector<T> values, const Compare::Type cmp = Compare::EQUAL);
public:
bool Accepts(const PbiRawData& idx, const size_t row) const;
......@@ -201,7 +201,7 @@ public:
///
/// \param[in] whitelist barcode IDs to compare on
///
PbiBarcodeFilter(std::vector<int16_t> whitelist);
PbiBarcodeFilter(std::vector<int16_t> whitelist, const Compare::Type cmp = Compare::EQUAL);
public:
/// \brief Performs the actual index lookup.
......@@ -240,7 +240,8 @@ public:
///
/// \param[in] whitelist barcode IDs to compare on
///
PbiBarcodeForwardFilter(std::vector<int16_t> whitelist);
PbiBarcodeForwardFilter(std::vector<int16_t> whitelist,
const Compare::Type cmp = Compare::EQUAL);
};
/// \brief The PbiBarcodeQualityFilter class provides a PbiFilter-compatible
......@@ -288,7 +289,8 @@ public:
///
/// \param[in] whitelist barcode IDs to compare on
///
PbiBarcodeReverseFilter(std::vector<int16_t> whitelist);
PbiBarcodeReverseFilter(std::vector<int16_t> whitelist,
const Compare::Type cmp = Compare::EQUAL);
};
/// \brief The PbiBarcodesFilter class provides a PbiFilter-compatible filter on
......@@ -411,7 +413,7 @@ public:
/// \note There is no compare type parameter here, it is always
/// Compare::EQUAL. Records will match movie name, exactly.
///
PbiMovieNameFilter(const std::string& movieName);
PbiMovieNameFilter(const std::string& movieName, const Compare::Type cmp = Compare::EQUAL);
/// \brief Creates a 'whitelisted' movie name filter.
///
......@@ -421,7 +423,8 @@ public:
///
/// \param[in] whitelist movie names to compare on
///
PbiMovieNameFilter(const std::vector<std::string>& whitelist);
PbiMovieNameFilter(const std::vector<std::string>& whitelist,
const Compare::Type cmp = Compare::EQUAL);
public:
/// \brief Performs the actual index lookup.
......@@ -432,6 +435,7 @@ public:
private:
PbiFilter compositeFilter_;
Compare::Type cmp_;
};
/// \brief The PbiNumDeletedBasesFilter class provides a PbiFilter-compatible
......@@ -573,7 +577,7 @@ public:
/// \note There is no compare type parameter here, it is always
/// Compare::EQUAL. Records will match query name, exactly.
///
PbiQueryNameFilter(const std::string& qname);
PbiQueryNameFilter(const std::string& qname, const Compare::Type cmp = Compare::EQUAL);
/// \brief Creates a 'whitelisted' query name filter.
///
......@@ -583,7 +587,8 @@ public:
///
/// \param[in] whitelist query names to compare on
///
PbiQueryNameFilter(const std::vector<std::string>& whitelist);
PbiQueryNameFilter(const std::vector<std::string>& whitelist,
const Compare::Type cmp = Compare::EQUAL);
PbiQueryNameFilter(const PbiQueryNameFilter& other);
~PbiQueryNameFilter();
......@@ -686,7 +691,7 @@ public:
///
/// \param[in] whitelist read group IDs to compare on
///
PbiReadGroupFilter(std::vector<int32_t> whitelist);
PbiReadGroupFilter(std::vector<int32_t> whitelist, const Compare::Type cmp = Compare::EQUAL);
/// \brief Creates a 'whitelisted' filter on read group printable IDs.
///
......@@ -696,7 +701,8 @@ public:
///
/// \param[in] whitelist read group ID strings to compare on
///
PbiReadGroupFilter(const std::vector<std::string>& whitelist);
PbiReadGroupFilter(const std::vector<std::string>& whitelist,
const Compare::Type cmp = Compare::EQUAL);
/// \brief Creates a 'whitelisted' filter using read group objects.
///
......@@ -706,7 +712,8 @@ public:
///
/// \param[in] whitelist read group objects to compare on
///
PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist);
PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist,
const Compare::Type cmp = Compare::EQUAL);
};
/// \brief The PbiReferenceEndFilter class provides a PbiFilter-compatible
......@@ -754,7 +761,7 @@ public:
///
/// \param[in] whitelist reference IDs to compare on
///
PbiReferenceIdFilter(std::vector<int32_t> whitelist);
PbiReferenceIdFilter(std::vector<int32_t> whitelist, const Compare::Type cmp = Compare::EQUAL);
};
/// \brief The PbiReferenceNameFilter class provides a PbiFilter-compatible
......@@ -782,7 +789,8 @@ public:
///
/// \param[in] whitelist reference names to compare on
///
PbiReferenceNameFilter(std::vector<std::string> whitelist);
PbiReferenceNameFilter(std::vector<std::string> whitelist,
const Compare::Type cmp = Compare::EQUAL);
public:
/// \brief Performs the actual index lookup.
......@@ -848,7 +856,39 @@ public:
///
/// \param[in] whitelist ZMW hole numbers to compare on
///
PbiZmwFilter(std::vector<int32_t> whitelist);
PbiZmwFilter(std::vector<int32_t> whitelist, const Compare::Type cmp = Compare::EQUAL);
};
// ----------------------------------------------
// NOTE: modulo filtering only enabled for ZMW.
//
// I need to generalize more if we're going to use
// this on more fields.
// ----------------------------------------------
enum class FilterHash
{
UNSIGNED_LONG_CAST,
BOOST_HASH_COMBINE,
};
struct PbiZmwModuloFilter
{
PbiZmwModuloFilter(const uint32_t denominator, const uint32_t value,
const FilterHash hashtype = FilterHash::UNSIGNED_LONG_CAST,
const Compare::Type = Compare::EQUAL);
/// \brief Performs the actual index lookup.
///
/// Most client code should not need to use this method directly.
///
bool Accepts(const PbiRawData& idx, const size_t row) const;
private:
uint32_t denominator_;
uint32_t value_;
FilterHash hash_;
Compare::Type cmp_;
};
} // namespace BAM
......
......@@ -8,6 +8,8 @@
#include <cassert>
#include <stdexcept>
#include <boost/functional/hash/hash.hpp>
namespace PacBio {
namespace BAM {
......@@ -20,8 +22,9 @@ inline FilterBase<T>::FilterBase(T value, const Compare::Type cmp)
{ }
template <typename T>
inline FilterBase<T>::FilterBase(std::vector<T> values)
inline FilterBase<T>::FilterBase(std::vector<T> values, const Compare::Type cmp)
: multiValue_{std::move(values)}
, cmp_{cmp}
{ }
template<typename T>
......@@ -35,6 +38,8 @@ inline bool FilterBase<T>::CompareHelper(const T& lhs) const
template<typename T>
inline bool FilterBase<T>::CompareMultiHelper(const T& lhs) const
{
if (cmp_ == Compare::EQUAL)
{
// check provided value against all filter criteria,
// return true on any exact match
......@@ -46,21 +51,26 @@ inline bool FilterBase<T>::CompareMultiHelper(const T& lhs) const
}
return false; // no matches
}
else if (cmp_ == Compare::NOT_EQUAL)
{
// check provided value against all filter criteria,
// return true on any exact match
auto iter = multiValue_.get().cbegin();
const auto end = multiValue_.get().cend();
for (; iter != end; ++iter) {
if (*iter == lhs)
return false;
}
return true;
}
else
throw std::runtime_error{"unsupported compare type on multivalue filter"};
}
template<typename T>
inline bool FilterBase<T>::CompareSingleHelper(const T& lhs) const
{
switch(cmp_) {
case Compare::EQUAL: return lhs == value_;
case Compare::LESS_THAN: return lhs < value_;
case Compare::LESS_THAN_EQUAL: return lhs <= value_;
case Compare::GREATER_THAN: return lhs > value_;
case Compare::GREATER_THAN_EQUAL: return lhs >= value_;
case Compare::NOT_EQUAL: return lhs != value_;
default:
assert(false);
throw std::runtime_error{"unsupported compare type requested"};
}
return Compare::Check(lhs, value_, cmp_);
}
template<>
......@@ -90,8 +100,8 @@ inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(T value, const Com
{ }
template<typename T, PbiFile::BarcodeField field>
inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(std::vector<T> values)
: FilterBase<T>{std::move(values)}
inline BarcodeDataFilterBase<T, field>::BarcodeDataFilterBase(std::vector<T> values, const Compare::Type cmp)
: FilterBase<T>{std::move(values), cmp}
{ }
template<typename T, PbiFile::BarcodeField field>
......@@ -117,8 +127,8 @@ inline BasicDataFilterBase<T, field>::BasicDataFilterBase(T value, const Compare
{ }
template<typename T, PbiFile::BasicField field>
inline BasicDataFilterBase<T, field>::BasicDataFilterBase(std::vector<T> values)
: FilterBase<T>{std::move(values)}
inline BasicDataFilterBase<T, field>::BasicDataFilterBase(std::vector<T> values, const Compare::Type cmp)
: FilterBase<T>{std::move(values), cmp}
{ }
template<typename T, PbiFile::BasicField field>
......@@ -159,8 +169,8 @@ inline MappedDataFilterBase<T, field>::MappedDataFilterBase(T value, const Compa
{ }
template<typename T, PbiFile::MappedField field>
inline MappedDataFilterBase<T, field>::MappedDataFilterBase(std::vector<T> values)
: FilterBase<T>{std::move(values)}
inline MappedDataFilterBase<T, field>::MappedDataFilterBase(std::vector<T> values, const Compare::Type cmp)
: FilterBase<T>{std::move(values), cmp}
{ }
template<>
......@@ -233,9 +243,9 @@ inline PbiBarcodeFilter::PbiBarcodeFilter(const int16_t barcode, const Compare::
}
{ }
inline PbiBarcodeFilter::PbiBarcodeFilter(std::vector<int16_t> whitelist)
: compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{std::move(whitelist)},
PbiBarcodeReverseFilter{std::move(whitelist)}
inline PbiBarcodeFilter::PbiBarcodeFilter(std::vector<int16_t> whitelist, const Compare::Type cmp)
: compositeFilter_{ PbiFilter::Union({ PbiBarcodeForwardFilter{std::move(whitelist), cmp},
PbiBarcodeReverseFilter{std::move(whitelist), cmp}
})
}
{ }
......@@ -249,8 +259,8 @@ inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(const int16_t bcFwdId, c
: internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_FORWARD>{bcFwdId, cmp}
{ }
inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(std::vector<int16_t> whitelist)
: internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_FORWARD>{std::move(whitelist)}
inline PbiBarcodeForwardFilter::PbiBarcodeForwardFilter(std::vector<int16_t> whitelist, const Compare::Type cmp)
: internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_FORWARD>{std::move(whitelist), cmp}
{ }
// PbiBarcodeQualityFilter
......@@ -265,8 +275,8 @@ inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(const int16_t bcRevId, c
: internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_REVERSE>{bcRevId, cmp}
{ }
inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(std::vector<int16_t> whitelist)
: internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_REVERSE>{std::move(whitelist)}
inline PbiBarcodeReverseFilter::PbiBarcodeReverseFilter(std::vector<int16_t> whitelist, const Compare::Type cmp)
: internal::BarcodeDataFilterBase<int16_t, PbiFile::BarcodeField::BC_REVERSE>{std::move(whitelist), cmp}
{ }
// PbiBarcodesFilter
......@@ -308,7 +318,12 @@ inline PbiMapQualityFilter::PbiMapQualityFilter(const uint8_t mapQual, const Com
// PbiMovieNameFilter
inline bool PbiMovieNameFilter::Accepts(const PbiRawData& idx, const size_t row) const
{ return compositeFilter_.Accepts(idx, row); }
{
const bool found = compositeFilter_.Accepts(idx, row);
if (cmp_ == Compare::EQUAL) return found;
else if (cmp_ == Compare::NOT_EQUAL) return !found;
else throw std::runtime_error{"unsupported compare type on movie name filter"};
}
// PbiNumDeletedBasesFilter
......@@ -372,20 +387,20 @@ inline PbiReadGroupFilter::PbiReadGroupFilter(const ReadGroupInfo& rg, const Com
: PbiReadGroupFilter{rg.Id(), cmp}
{ }
inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<int32_t> whitelist)
: internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::move(whitelist)}
inline PbiReadGroupFilter::PbiReadGroupFilter(std::vector<int32_t> whitelist, const Compare::Type cmp)
: internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::move(whitelist), cmp}
{ }
inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<std::string>& whitelist)
: internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::vector<int32_t>{}}
inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<std::string>& whitelist, const Compare::Type cmp)
: internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::vector<int32_t>{}, cmp}
{
multiValue_->reserve(whitelist.size());
for (const auto& rg : whitelist)
multiValue_->push_back(ReadGroupInfo::IdToInt(rg));
}
inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist)
: internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::vector<int32_t>{}}
inline PbiReadGroupFilter::PbiReadGroupFilter(const std::vector<ReadGroupInfo>& whitelist, const Compare::Type cmp)
: internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::RG_ID>{std::vector<int32_t>{}, cmp}
{
multiValue_->reserve(whitelist.size());
for (const auto& rg : whitelist)
......@@ -404,8 +419,8 @@ inline PbiReferenceIdFilter::PbiReferenceIdFilter(const int32_t tId, const Compa
: internal::MappedDataFilterBase<int32_t, PbiFile::MappedField::T_ID>{tId, cmp}
{ }
inline PbiReferenceIdFilter::PbiReferenceIdFilter(std::vector<int32_t> whitelist)
: internal::MappedDataFilterBase<int32_t, PbiFile::MappedField::T_ID>{std::move(whitelist)}
inline PbiReferenceIdFilter::PbiReferenceIdFilter(std::vector<int32_t> whitelist, const Compare::Type cmp)
: internal::MappedDataFilterBase<int32_t, PbiFile::MappedField::T_ID>{std::move(whitelist), cmp}
{ }
// PbiReferenceStartFilter
......@@ -420,9 +435,70 @@ inline PbiZmwFilter::PbiZmwFilter(const int32_t zmw, const Compare::Type cmp)
: internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::ZMW>{zmw, cmp}
{ }
inline PbiZmwFilter::PbiZmwFilter(std::vector<int32_t> whitelist)
: internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::ZMW>{std::move(whitelist)}
inline PbiZmwFilter::PbiZmwFilter(std::vector<int32_t> whitelist, const Compare::Type cmp)
: internal::BasicDataFilterBase<int32_t, PbiFile::BasicField::ZMW>{std::move(whitelist), cmp}
{ }
// PbiZmwModuloFilter
inline PbiZmwModuloFilter::PbiZmwModuloFilter(
const uint32_t denominator,
const uint32_t value,
const FilterHash hashType,
const Compare::Type cmp)
: denominator_{denominator}
, value_{value}
, hash_{hashType}
, cmp_{cmp}
{ }
inline uint32_t UnsignedLongIntCast(const int32_t zm)
{
return static_cast<uint32_t>(zm);
}
inline uint32_t BoostHashCombine(const int32_t zm)
{
constexpr static const uint16_t mask = 0xFFFF;
const uint16_t upper = (zm >> 16) & mask;
const uint16_t lower = zm & mask;
// FIXME: discrepancies with Python API. Will return to nail down.
size_t seed = 0;
boost::hash_combine(seed, upper);
boost::hash_combine(seed, lower);
return static_cast<uint32_t>(seed);
}
inline bool PbiZmwModuloFilter::Accepts(const PbiRawData& idx,
const size_t row) const
{
const auto zm = idx.BasicData().holeNumber_.at(row);
uint32_t hashValue;
switch(hash_)
{
case FilterHash::UNSIGNED_LONG_CAST :
{
hashValue = UnsignedLongIntCast(zm);
break;
}
case FilterHash::BOOST_HASH_COMBINE :
{
hashValue = BoostHashCombine(zm);
break;
}
default:
throw std::runtime_error{"unsupported filter hash type"};
}
const auto modResult = hashValue % denominator_;
return Compare::Check(modResult, value_, cmp_);
}
} // namespace BAM
} // namespace PacBio
project(
'PacBioBAM',
'cpp',
version : '0.18.0',
version : '0.19.0',
default_options : [
'buildtype=release',
'warning_level=3',
......
......@@ -5,4 +5,26 @@ set -vex
# TEST #
########
type module >& /dev/null || . /mnt/software/Modules/current/init/bash
# Note: htslib v1.7 added native long CIGAR support. pbbam "spoofs" it
# when running <1.7. So we'll always check the default htslib for
# general test success/fail, and then check pre-/post-v1.7 explicitly
# to ensure we pass in either context (detectable at runtime).
# default htslib
ninja -C "${CURRENT_BUILD_DIR:-build}" -v test
# explicit htslib v1.6
module unload htslib
module load htslib/1.6
ninja -C "${CURRENT_BUILD_DIR:-build}" -v test
# explicit htslib v1.7
module unload htslib
module load htslib/1.7
ninja -C "${CURRENT_BUILD_DIR:-build}" -v test\
# restore default
module unload htslib
module load htslib
......@@ -12,19 +12,46 @@
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <tuple>
#include <utility>
#include <htslib/hts_endian.h>
#include "pbbam/BamTagCodec.h"
#include "BamRecordTags.h"
#include "MemoryUtils.h"
#include "pbbam/BamTagCodec.h"
#include "StringUtils.h"
namespace PacBio {
namespace BAM {
namespace {
static bool DoesHtslibSupportLongCigar()
{
const std::string htsVersion = hts_version();
// remove any "-<blah>" for non-release versions
const auto versionBase = PacBio::BAM::Split(htsVersion, '-');
if (versionBase.empty())
throw std::runtime_error{"invalid htslib version format: " + htsVersion};
// grab major/minor version numbers
const auto versionParts = PacBio::BAM::Split(versionBase[0], '.');
if (versionParts.size() < 2)
throw std::runtime_error{"invalid htslib version format: " + htsVersion};
// check against v1.7
const int versionMajor = std::stoi(versionParts[0]);
const int versionMinor = std::stoi(versionParts[1]);
static constexpr const int v17_major = 1;
static constexpr const int v17_minor = 7;
return std::tie(versionMajor, versionMinor) >= std::tie(v17_major, v17_minor);
}
static const bool has_native_long_cigar_support = DoesHtslibSupportLongCigar();
Cigar FetchRawCigar(const uint32_t* const src, const uint32_t len)
{
Cigar result;
......@@ -148,27 +175,21 @@ bool BamRecordImpl::AddTagImpl(const std::string& tagName, const Tag& value,
Cigar BamRecordImpl::CigarData() const
{
const auto* b = d_.get();
if (HasLongCigar(b)) {
if (!has_native_long_cigar_support && HasLongCigar(b)) {
// fetch long CIGAR from tag
const auto cigarTag = TagValue("CG");
const auto cigarTagValue = cigarTag.ToUInt32Array();
return FetchRawCigar(cigarTagValue.data(), cigarTagValue.size());
} else {
// fetch normal, short CIGAR from the standard location
// fetch CIGAR from the standard location
return FetchRawCigar(bam_get_cigar(b), b->core.n_cigar);
}
}
BamRecordImpl& BamRecordImpl::CigarData(const Cigar& cigar)
{
// Set normal, "short" CIGAR and remove CG tag if present.
if (cigar.size() < 65536) {
SetCigarData(cigar);
if (HasTag("CG")) RemoveTag("CG");
}
// Set long CIGAR data
else {
// if long CIGAR, using htslib version < 1.7, set it "manually"
if (!has_native_long_cigar_support && cigar.size() >= 65536) {
// Add the 'fake' CIGAR in normal place.
Cigar fake;
fake.emplace_back(CigarOperationType::SOFT_CLIP, SequenceLength());
......@@ -190,6 +211,12 @@ BamRecordImpl& BamRecordImpl::CigarData(const Cigar& cigar)
AddTag("CG", Tag{cigarData});
}
// otherwise (v1.7+ or short CIGAR), use standard APIs
else {
if (HasTag("CG")) RemoveTag("CG");
SetCigarData(cigar);
}
return *this;
}
......
......@@ -25,12 +25,13 @@ namespace PacBio {
namespace BAM {
namespace internal {
class BamWriterPrivate : public internal::FileProducer
class BamWriterPrivate
{
public:
BamWriterPrivate(const std::string& filename, const std::shared_ptr<bam_hdr_t> rawHeader,
const BamWriter::CompressionLevel compressionLevel, const size_t numThreads,
const BamWriter::BinCalculationMode binCalculationMode);
const BamWriter::BinCalculationMode binCalculationMode,
const bool useTempFile);
public:
void Write(const BamRecord& record);
......@@ -41,21 +42,23 @@ public:
bool calculateBins_;
std::unique_ptr<samFile, internal::HtslibFileDeleter> file_;
std::shared_ptr<bam_hdr_t> header_;
std::unique_ptr<internal::FileProducer> fileProducer_;
};
BamWriterPrivate::BamWriterPrivate(const std::string& filename,
const std::shared_ptr<bam_hdr_t> rawHeader,
const BamWriter::CompressionLevel compressionLevel,
const size_t numThreads,
const BamWriter::BinCalculationMode binCalculationMode)
: internal::FileProducer{filename}
, calculateBins_{binCalculationMode == BamWriter::BinCalculation_ON}
, header_{rawHeader}
const BamWriter::BinCalculationMode binCalculationMode,
const bool useTempFile)
: calculateBins_{binCalculationMode == BamWriter::BinCalculation_ON}, header_{rawHeader}
{
if (!header_) throw std::runtime_error{"null header"};
if (useTempFile) fileProducer_ = std::make_unique<internal::FileProducer>(filename);
// open file
const auto usingFilename = TempFilename();
const auto usingFilename = (fileProducer_ ? fileProducer_->TempFilename() : filename);
const auto mode = std::string("wb") + std::to_string(static_cast<int>(compressionLevel));
file_.reset(sam_open(usingFilename.c_str(), mode.c_str()));
if (!file_) throw std::runtime_error{"could not open file for writing"};
......@@ -123,7 +126,7 @@ inline void BamWriterPrivate::Write(const BamRecordImpl& recordImpl)
BamWriter::BamWriter(const std::string& filename, const BamHeader& header,
const BamWriter::CompressionLevel compressionLevel, const size_t numThreads,
const BinCalculationMode binCalculationMode)
const BinCalculationMode binCalculationMode, const bool useTempFile)
: IRecordWriter()
{
#if PBBAM_AUTOVALIDATE
......@@ -131,7 +134,18 @@ BamWriter::BamWriter(const std::string& filename, const BamHeader& header,
#endif
d_ = std::make_unique<internal::BamWriterPrivate>(
filename, internal::BamHeaderMemory::MakeRawHeader(header), compressionLevel, numThreads,
binCalculationMode);
binCalculationMode, useTempFile);
}
BamWriter::BamWriter(const std::string& filename, const BamHeader& header,
const BamWriter::Config& config)
: BamWriter{filename,
header,
config.compressionLevel,
config.numThreads,
config.binCalculationMode,
config.useTempFile}
{
}
BamWriter::~BamWriter()
......
......@@ -16,6 +16,8 @@ namespace PacBio {
namespace BAM {
namespace internal {
// clang-format off
extern const ChemistryTable BuiltInChemistryTable = {
// BindingKit, SequencingKit, BasecallerVersion, Chemistry
......@@ -72,23 +74,21 @@ extern const ChemistryTable BuiltInChemistryTable = {
{{"101-365-900", "100-861-800", "5.0", "S/P2-C2/5.0"}},
{{"101-365-900", "101-093-700", "5.0", "S/P2-C2/5.0"}},
// 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet"); S/P2-C2
{{"101-365-900", "101-309-500", "5.0", "S/P2-C2/5.0"}},
// 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet (4 rxn)"); S/P2-C2
{{"101-365-900", "101-309-400", "5.0", "S/P2-C2/5.0"}},
// 5.0.1 ChemRel; Sequel® Binding Kit 2.1; S/P2-C2
{{"101-365-900", "101-309-500", "5.0", "S/P2-C2/5.0"}}, // Sequel® Sequencing Plate 2.1 Silwet (8 rxn)
{{"101-365-900", "101-309-400", "5.0", "S/P2-C2/5.0"}}, // Sequel® Sequencing Plate 2.1 Silwet (4 rxn)
// 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet - prototype parts"); S/P2-C2
{{"101-490-800", "101-490-900", "5.0", "S/P2-C2/5.0"}},
// 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet (4 rxn) - prototype parts"); S/P2-C2
{{"101-490-800", "101-491-000", "5.0", "S/P2-C2/5.0"}},
// 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet - prototype parts"); S/P2-C2
{{"101-500-400", "101-490-900", "5.0", "S/P2-C2/5.0"}},
// 5.0.1 ChemRel ("Sequel® Sequencing Plate Silwet (4 rxn) - prototype parts"); S/P2-C2
{{"101-500-400", "101-491-000", "5.0", "S/P2-C2/5.0"}}
// 5.0.1 ChemRel; Sequel® Binding Kit 3.0; S/P3-C3
{{"101-500-400", "101-427-500", "5.0", "S/P3-C3/5.0"}}, // Sequel® Sequencing Plate 3.0 (8 rxn)
{{"101-500-400", "101-427-800", "5.0", "S/P3-C3/5.0"}}, // Sequel® Sequencing Plate 3.0 (4 rxn)
// 5.0.1 ChemRel; Sequel® Dev Binding Kit; S/P2-C2
{{"101-490-800", "101-490-900", "5.0", "S/P2-C2/5.0"}}, // Sequel® Dev Sequencing Plate (4 rxn)
{{"101-490-800", "101-491-000", "5.0", "S/P2-C2/5.0"}}, // Sequel® Dev Sequencing Plate (8 rxn)
};
// clang-format on
ChemistryTable ChemistryTableFromXml(const std::string& mappingXml)
{
if (!FileUtils::Exists(mappingXml))
......
......@@ -44,8 +44,10 @@ static const std::unordered_map<std::string, Compare::Type> opToTypeMap =
{ "==", Compare::EQUAL },
{ "=", Compare::EQUAL },
{ "eq", Compare::EQUAL },
{ "in", Compare::EQUAL },
{ "!=", Compare::NOT_EQUAL },
{ "ne", Compare::NOT_EQUAL },
{ "not_in", Compare::NOT_EQUAL },
{ "<", Compare::LESS_THAN },
{ "lt", Compare::LESS_THAN },
{ "&lt;", Compare::LESS_THAN },
......
......@@ -93,6 +93,9 @@ DataSet::DataSet(const DataSet::TypeEnum type)
case DataSet::TRANSCRIPT:
d_ = std::make_unique<TranscriptSet>();
break;
case DataSet::TRANSCRIPT_ALIGNMENT:
d_ = std::make_unique<TranscriptAlignmentSet>();
break;
default:
throw std::runtime_error{"unsupported dataset type"};
}
......@@ -229,6 +232,7 @@ DataSet::TypeEnum DataSet::NameToType(const std::string& typeName)
lookup["ReferenceSet"] = DataSet::REFERENCE;
lookup["SubreadSet"] = DataSet::SUBREAD;
lookup["TranscriptSet"] = DataSet::TRANSCRIPT;
lookup["TranscriptAlignmentSet"] = DataSet::TRANSCRIPT_ALIGNMENT;
}
return lookup.at(typeName); // throws if unknown typename
}
......@@ -293,6 +297,8 @@ std::string DataSet::TypeToName(const DataSet::TypeEnum& type)
return "SubreadSet";
case DataSet::TRANSCRIPT:
return "TranscriptSet";
case DataSet::TRANSCRIPT_ALIGNMENT:
return "TranscriptAlignmentSet";
default:
throw std::runtime_error{"unsupported dataset type"};
}
......
......@@ -157,6 +157,8 @@ std::shared_ptr<DataSetBase> DataSetBase::Create(const std::string& typeName)
if (typeName == std::string("HdfSubreadSet")) return std::make_shared<HdfSubreadSet>();
if (typeName == std::string("ReferenceSet")) return std::make_shared<ReferenceSet>();
if (typeName == std::string("TranscriptSet")) return std::make_shared<TranscriptSet>();
if (typeName == std::string("TranscriptAlignmentSet"))
return std::make_shared<TranscriptAlignmentSet>();
// unknown typename
throw std::runtime_error{"unsupported dataset type"};
......@@ -437,5 +439,15 @@ TranscriptSet::TranscriptSet()
{
}
// -------------------
// TranscriptAlignmentSet
// -------------------
TranscriptAlignmentSet::TranscriptAlignmentSet()
: DataSetBase("PacBio.DataSet.TranscriptAlignmentSet", "TranscriptAlignmentSet",
XsdType::DATASETS)
{
}
} // namespace BAM
} // namespace PacBio
......@@ -103,6 +103,7 @@ static const auto elementRegistry = std::unordered_map<std::string, XsdType>
{ "SummaryStats", XsdType::DATASETS },
{ "TotalLength", XsdType::DATASETS },
{ "TranscriptSet", XsdType::DATASETS },
{ "TranscriptAlignmentSet",XsdType::DATASETS },
// 'pbmeta' elements
//
......