Commit 44176ef8 authored by Andreas Tille's avatar Andreas Tille

Updated version 0.52.2 from 'upstream/0.52.2'

with Debian dir 517835efa1c345085df462207068f117366092ed
parents e1435226 c5954558
Package: matrixStats
Version: 0.52.2
Depends: R (>= 2.12.0)
Suggests: base64enc, ggplot2, knitr, microbenchmark, R.devices, R.rsp
VignetteBuilder: R.rsp
Date: 2017-04-13
Title: Functions that Apply to Rows and Columns of Matrices (and to
Vectors)
Authors@R: c(
person("Henrik", "Bengtsson", role=c("aut", "cre", "cph"),
email="henrikb@braju.com"),
person("Hector", "Corrada Bravo", role="ctb"),
person("Robert", "Gentleman", role="ctb"),
person("Ola", "Hossjer", role="ctb"),
person("Harris", "Jaffee", role="ctb"),
person("Dongcan", "Jiang", role="ctb"),
person("Peter", "Langfelder", role="ctb"))
Author: Henrik Bengtsson [aut, cre, cph], Hector Corrada Bravo [ctb], Robert Gentleman [ctb], Ola Hossjer [ctb], Harris Jaffee [ctb], Dongcan Jiang [ctb], Peter Langfelder [ctb]
Maintainer: Henrik Bengtsson <henrikb@braju.com>
Description: High-performing functions operating on rows and columns of matrices, e.g. col / rowMedians(), col / rowRanks(), and col / rowSds(). Functions optimized per data type and for subsetted calculations such that both memory usage and processing time is minimized. There are also optimized vector-based methods, e.g. binMeans(), madDiff() and weightedMedian().
License: Artistic-2.0
LazyLoad: TRUE
NeedsCompilation: yes
ByteCompile: TRUE
URL: https://github.com/HenrikBengtsson/matrixStats
BugReports: https://github.com/HenrikBengtsson/matrixStats/issues
RoxygenNote: 6.0.1
Packaged: 2017-04-13 07:54:02 UTC; hb
Repository: CRAN
Date/Publication: 2017-04-14 14:49:54 UTC
This diff is collapsed.
# Generated by roxygen2: do not edit by hand
export(allValue)
export(allocArray)
export(allocMatrix)
export(allocVector)
export(anyMissing)
export(anyValue)
export(binCounts)
export(binMeans)
export(colAlls)
export(colAnyMissings)
export(colAnyNAs)
export(colAnys)
export(colAvgsPerRowSet)
export(colCollapse)
export(colCounts)
export(colCummaxs)
export(colCummins)
export(colCumprods)
export(colCumsums)
export(colDiffs)
export(colIQRDiffs)
export(colIQRs)
export(colLogSumExps)
export(colMadDiffs)
export(colMads)
export(colMaxs)
export(colMeans2)
export(colMedians)
export(colMins)
export(colOrderStats)
export(colProds)
export(colQuantiles)
export(colRanges)
export(colRanks)
export(colSdDiffs)
export(colSds)
export(colSums2)
export(colTabulates)
export(colVarDiffs)
export(colVars)
export(colWeightedMads)
export(colWeightedMeans)
export(colWeightedMedians)
export(colWeightedSds)
export(colWeightedVars)
export(count)
export(diff2)
export(indexByRow)
export(iqr)
export(iqrDiff)
export(logSumExp)
export(madDiff)
export(mean2)
export(meanOver)
export(product)
export(rowAlls)
export(rowAnyMissings)
export(rowAnyNAs)
export(rowAnys)
export(rowAvgsPerColSet)
export(rowCollapse)
export(rowCounts)
export(rowCummaxs)
export(rowCummins)
export(rowCumprods)
export(rowCumsums)
export(rowDiffs)
export(rowIQRDiffs)
export(rowIQRs)
export(rowLogSumExps)
export(rowMadDiffs)
export(rowMads)
export(rowMaxs)
export(rowMeans2)
export(rowMedians)
export(rowMins)
export(rowOrderStats)
export(rowProds)
export(rowQuantiles)
export(rowRanges)
export(rowRanks)
export(rowSdDiffs)
export(rowSds)
export(rowSums2)
export(rowTabulates)
export(rowVarDiffs)
export(rowVars)
export(rowWeightedMads)
export(rowWeightedMeans)
export(rowWeightedMedians)
export(rowWeightedSds)
export(rowWeightedVars)
export(sdDiff)
export(signTabulate)
export(sum2)
export(sumOver)
export(t_tx_OP_y)
export(validateIndices)
export(varDiff)
export(weightedMad)
export(weightedMean)
export(weightedMedian)
export(weightedSd)
export(weightedVar)
export(x_OP_y)
importFrom(stats,mad)
importFrom(stats,median)
importFrom(stats,quantile)
useDynLib("matrixStats", .registration = TRUE, .fixes = "C_")
This diff is collapsed.
#' Package matrixStats
#'
#' High-performing functions operating on rows and columns of matrices, e.g.
#' col / rowMedians(), col / rowRanks(), and col / rowSds(). Functions
#' optimized per data type and for subsetted calculations such that both memory
#' usage and processing time is minimized. There are also optimized
#' vector-based methods, e.g. binMeans(), madDiff() and weightedMedian().
#'
#' @section How to cite this package:
#' Henrik Bengtsson (2017). matrixStats: Functions that Apply to Rows and
#' Columns of Matrices (and to Vectors). R package version 0.52.2.
#' https://github.com/HenrikBengtsson/matrixStats
#'
#' @author Henrik Bengtsson, Hector Corrada Bravo, Robert Gentleman, Ola
#' Hossjer, Harris Jaffee, Dongcan Jiang, Peter Langfelder
#'
#' @keywords package
#'
#' @name matrixStats-package
#' @aliases matrixStats
#' @docType package
NULL
#' Allocates an empty vector, matrix or array
#'
#' Allocates an empty vector, matrix or array faster than the corresponding
#' function in R.
#'
#'
#' @param value A \code{\link[base]{numeric}} scalar that all elements will
#' have as value.
#'
#' @param ... Not used.
#'
#' @param length,nrow,ncol,dim \code{\link[base]{numeric}}s specifying the
#' dimension of the created \code{\link[base]{vector}},
#' \code{\link[base]{matrix}} or \code{\link[base]{array}}.
#'
#' @return Returns a \code{\link[base]{vector}}, \code{\link[base]{matrix}} and
#' \code{\link[base]{array}} respectively of the same data type as
#' \code{value}.
#'
#' @author Henrik Bengtsson
#'
#' @seealso See also \code{\link[base]{vector}}, \code{\link[base]{matrix}} and
#' \code{\link[base]{array}}.
#'
#' @keywords internal programming
#'
#' @export
allocMatrix <- function(nrow, ncol, value = 0.0, ...) {
nrow <- as.integer(nrow)
ncol <- as.integer(ncol)
.Call(C_allocMatrix2, nrow, ncol, value)
}
#' @rdname allocMatrix
#' @export
allocVector <- function(length, value = 0.0, ...) {
length <- as.integer(length)
.Call(C_allocVector2, length, value)
}
#' @rdname allocMatrix
#' @export
allocArray <- function(dim, value = 0.0, ...) {
dim <- as.integer(dim)
.Call(C_allocArray2, dim, value)
}
#' Checks if there are any missing values in an object or not
#'
#' Checks if there are any missing values in an object or not.
#' \emph{Please use \code{base::anyNA()} instead of \code{anyMissing()},
#' \code{colAnyNAs()} instead of \code{colAnyMissings()}, and
#' \code{rowAnyNAs()} instead of \code{rowAnyMissings()}.}
#'
#' The implementation of this method is optimized for both speed and memory.
#' The method will return \code{\link[base:logical]{TRUE}} as soon as a missing
#' value is detected.
#'
#' @param x A \code{\link[base]{vector}}, a \code{\link[base]{list}}, a
#' \code{\link[base]{matrix}}, a \code{\link[base]{data.frame}}, or
#' \code{\link[base]{NULL}}.
#'
#' @param idxs,rows,cols A \code{\link[base]{vector}} indicating subset of
#' elements (or rows and/or columns) to operate over. If
#' \code{\link[base]{NULL}}, no subsetting is done.
#' @param ... Not used.
#'
#' @return Returns \code{\link[base:logical]{TRUE}} if a missing value was
#' detected, otherwise \code{\link[base:logical]{FALSE}}.
#'
#' @examples
#' x <- rnorm(n = 1000)
#' x[seq(300, length(x), by = 100)] <- NA
#' stopifnot(anyMissing(x) == any(is.na(x)))
#'
#' @author Henrik Bengtsson
#'
#' @seealso Starting with R v3.1.0, there is \code{anyNA()} in the \pkg{base},
#' which provides the same functionality as \code{anyMissing()}.
#'
#' @keywords iteration logic
#' @export
anyMissing <- function(x, idxs = NULL, ...) {
## All list or a data.frame?
if (is.list(x)) {
for (kk in seq_along(x)) {
if (.Call(C_anyMissing, x[[kk]], idxs))
return(TRUE)
}
return(FALSE)
} else {
## All other data types
.Call(C_anyMissing, x, idxs)
}
}
#' @rdname anyMissing
#' @export
colAnyMissings <- function(x, rows = NULL, cols = NULL, ...) {
colAnys(x, rows, cols, value = NA, ...)
}
#' @rdname anyMissing
#' @export
rowAnyMissings <- function(x, rows = NULL, cols = NULL, ...) {
rowAnys(x, rows, cols, value = NA, ...)
}
#' @rdname anyMissing
#' @export
colAnyNAs <- function(x, rows = NULL, cols = NULL, ...) {
colAnys(x, rows, cols, value = NA, ...)
}
#' @rdname anyMissing
#' @export
rowAnyNAs <- function(x, rows = NULL, cols = NULL, ...) {
rowAnys(x, rows, cols, value = NA, ...)
}
benchmark <- function(fcn, tags = NULL, path = NULL, workdir = "reports",
envir = parent.frame(), ...) {
requireNamespace("R.rsp") || stop("R.rsp not installed.")
if (is.function(fcn)) {
fcn <- deparse(substitute(fcn))
}
if (is.null(path)) {
path <- system.file("benchmarking", package = "matrixStats")
}
fullname <- paste(c(fcn, tags), collapse = ", ")
filename <- sprintf("%s.md.rsp", fullname)
pathname <- file.path(path, filename)
oopts <- options("prompt" = "> ")
on.exit(options(oopts))
R.rsp::rfile(pathname, workdir = workdir, envir = envir, ...)
}
#' Fast element counting in non-overlapping bins
#'
#' Counts the number of elements in non-overlapping bins
#'
#' \code{binCounts(x, bx, right = TRUE)} gives equivalent results as
#' \code{rev(binCounts(-x, bx = rev(-bx), right = FALSE))}, but is faster
#' and more memory efficient.
#'
#' @param x A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K
#' positions for to be binned and counted.
#'
#' @param idxs A \code{\link[base]{vector}} indicating subset of elements to
#' operate over. If \code{\link[base]{NULL}}, no subsetting is done.
#'
#' @param bx A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B + 1
#' ordered positions specifying the B > 0 bins \code{[bx[1], bx[2])},
#' \code{[bx[2], bx[3])}, ..., \code{[bx[B], bx[B + 1])}.
#'
#' @param right If \code{\link[base:logical]{TRUE}}, the bins are right-closed
#' (left open), otherwise left-closed (right open).
#'
#' @param ... Not used.
#'
#' @return Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of
#' length B with non-negative integers.
#'
#' @section Missing and non-finite values:
#' Missing values in \code{x} are ignored/dropped. Missing values in \code{bx}
#' are not allowed and gives an error.
#'
#' @author Henrik Bengtsson
#'
#' @seealso An alternative for counting occurrences within bins is
#' \code{\link[graphics]{hist}}, e.g. \code{hist(x, breaks = bx,
#' plot = FALSE)$counts}. That approach is ~30-60\% slower than
#' \code{binCounts(..., right = TRUE)}.
#'
#' To count occurrences of indices \code{x} (positive
#' \code{\link[base]{integer}}s) in \code{[1, B]}, use \code{tabulate(x,
#' nbins = B)}, where \code{x} does \emph{not} have to be sorted first. For
#' details, see \code{\link[base]{tabulate}}().
#'
#' To average values within bins, see \code{\link{binMeans}}().
#'
#' @keywords univar
#' @export
binCounts <- function(x, idxs = NULL, bx, right = FALSE, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'x':
if (!is.numeric(x)) {
stop("Argument 'x' is not numeric: ", mode(x))
}
# Argument 'bx':
if (!is.numeric(bx)) {
stop("Argument 'bx' is not numeric: ", mode(bx))
}
if (any(is.infinite(bx))) {
stop("Argument 'bx' must not contain Inf values.")
}
if (is.unsorted(bx)) {
stop("Argument 'bx' is not ordered.")
}
# Apply subset
if (!is.null(idxs)) x <- x[idxs]
# Argument 'right':
right <- as.logical(right)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Preprocessing of x
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Drop missing values
keep <- which(!is.na(x))
if (length(keep) < length(x)) {
x <- x[keep]
}
keep <- NULL # Not needed anymore
# Order x (by increasing x).
# If 'x' is already sorted, the overhead of (re)sorting is
# relatively small.
x <- sort.int(x, method = "quick")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Bin
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- as.numeric(x)
bx <- as.numeric(bx)
.Call(C_binCounts, x, bx, right)
}
#' Fast mean calculations in non-overlapping bins
#'
#' Computes the sample means in non-overlapping bins
#'
#' \code{binMeans(x, bx, right = TRUE)} gives equivalent results as
#' \code{rev(binMeans(-x, bx = sort(-bx), right = FALSE))}, but is faster.
#'
#' @param y A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K
#' values to calculate means on.
#'
#' @param x A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K
#' positions for to be binned.
#'
#' @param idxs A \code{\link[base]{vector}} indicating subset of elements to
#' operate over. If \code{\link[base]{NULL}}, no subsetting is done.
#'
#' @param bx A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B + 1
#' ordered positions specifying the B > 0 bins \code{[bx[1], bx[2])},
#' \code{[bx[2], bx[3])}, ..., \code{[bx[B], bx[B + 1])}.
#'
#' @param na.rm If \code{\link[base:logical]{TRUE}}, missing values in \code{y}
#' are dropped before calculating the mean, otherwise not.
#'
#' @param count If \code{\link[base:logical]{TRUE}}, the number of data points
#' in each bins is returned as attribute \code{count}, which is an
#' \code{\link[base]{integer}} \code{\link[base]{vector}} of length B.
#'
#' @param right If \code{\link[base:logical]{TRUE}}, the bins are right-closed
#' (left open), otherwise left-closed (right open).
#'
#' @param ... Not used.
#'
#' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of
#' length B.
#'
#' @section Missing and non-finite values:
#' Data points where either of \code{y} and \code{x} is missing are dropped
#' (and therefore are also not counted). Non-finite values in \code{y} are
#' not allowed and gives an error. Missing values in \code{bx} are not allowed
#' and gives an error.
#'
#' @example incl/binMeans.R
#'
#' @author Henrik Bengtsson with initial code contributions by
#' Martin Morgan [1].
#'
#' @seealso \code{\link{binCounts}}(). \code{\link[stats]{aggregate}} and
#' \code{\link[base]{mean}}().
#'
#' @references [1] R-devel thread \emph{Fastest non-overlapping binning mean
#' function out there?} on Oct 3, 2012\cr
#'
#' @keywords univar
#' @export
binMeans <- function(y, x, idxs = NULL, bx, na.rm = TRUE, count = TRUE,
right = FALSE, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'y':
if (!is.numeric(y)) {
stop("Argument 'y' is not numeric: ", mode(y))
}
if (any(is.infinite(y))) {
stop("Argument 'y' must not contain Inf values.")
}
n <- length(y)
# Argument 'x':
if (!is.numeric(x)) {
stop("Argument 'x' is not numeric: ", mode(x))
}
if (length(x) != n) {
stop("Argument 'y' and 'x' are of different lengths: ",
length(y), " != ", length(x))
}
# Argument 'bx':
if (!is.numeric(bx)) {
stop("Argument 'bx' is not numeric: ", mode(bx))
}
if (any(is.infinite(bx))) {
stop("Argument 'bx' must not contain Inf values.")
}
if (is.unsorted(bx)) {
stop("Argument 'bx' is not ordered.")
}
# Argument 'na.rm':
if (!is.logical(na.rm)) {
stop("Argument 'na.rm' is not logical: ", mode(na.rm))
}
# Argument 'count':
if (!is.logical(count)) {
stop("Argument 'count' is not logical: ", mode(count))
}
# Apply subset
if (!is.null(idxs)) {
x <- x[idxs]
y <- y[idxs]
}
# Argument 'right':
right <- as.logical(right)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Preprocessing of (x, y)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Drop missing values in 'x'
keep <- which(!is.na(x))
if (length(keep) < n) {
x <- x[keep]
y <- y[keep]
n <- length(y)
}
keep <- NULL # Not needed anymore
# Drop missing values in 'y'?
if (na.rm) {
keep <- which(!is.na(y))
if (length(keep) < n) {
x <- x[keep]
y <- y[keep]
}
keep <- NULL # Not needed anymore
}
# Order (x, y) by increasing x.
# If 'x' is already sorted, the overhead of (re)sorting is
# relatively small.
x <- sort.int(x, method = "quick", index.return = TRUE)
y <- y[x$ix]
x <- x$x
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Bin
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
y <- as.numeric(y)
x <- as.numeric(x)
bx <- as.numeric(bx)
count <- as.logical(count)
.Call(C_binMeans, y, x, bx, count, right)
}
#' Fast lagged differences
#'
#' Computes the lagged and iterated differences.
#'
#' @param x A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length
#' N.
#'
#' @param idxs A \code{\link[base]{vector}} indicating subset of elements to
#' operate over. If \code{\link[base]{NULL}}, no subsetting is done.
#'
#' @param lag An \code{\link[base]{integer}} specifying the lag.
#'
#' @param differences An \code{\link[base]{integer}} specifying the order of
#' difference.
#'
#' @param ... Not used.
#'
#' @return Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of
#' length N - \code{differences}.
#'
#' @examples
#' diff2(1:10)
#'
#' @author Henrik Bengtsson
#'
#' @seealso \code{\link[base]{diff}}().
#' @keywords univar internal
#'
#' @export
diff2 <- function(x, idxs = NULL, lag = 1L, differences = 1L, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -