Commit 8ea0a47f authored by Andreas Tille's avatar Andreas Tille

Update upstream source from tag 'upstream/0.55.0'

Update to upstream version '0.55.0'
with Debian dir 4ea317a254c5894a9f20f6c756f9984bfd44f1b6
parents de9259b6 3b2c4045
Package: matrixStats
Version: 0.54.0
Version: 0.55.0
Depends: R (>= 2.12.0)
Suggests: base64enc, ggplot2, knitr, microbenchmark, R.devices, R.rsp
VignetteBuilder: R.rsp
Date: 2018-07-23
Title: Functions that Apply to Rows and Columns of Matrices (and to
Vectors)
Authors@R: c(
......@@ -15,7 +14,8 @@ Authors@R: c(
person("Harris", "Jaffee", role="ctb"),
person("Dongcan", "Jiang", role="ctb"),
person("Peter", "Langfelder", role="ctb"),
person("Peter", "Hickey", role="ctb"))
person("Peter", "Hickey", role="ctb"),
person("Brian", "Montgomery", role="ctb"))
Author: Henrik Bengtsson [aut, cre, cph],
Hector Corrada Bravo [ctb],
Robert Gentleman [ctb],
......@@ -23,7 +23,8 @@ Author: Henrik Bengtsson [aut, cre, cph],
Harris Jaffee [ctb],
Dongcan Jiang [ctb],
Peter Langfelder [ctb],
Peter Hickey [ctb]
Peter Hickey [ctb],
Brian Montgomery [ctb]
Maintainer: Henrik Bengtsson <henrikb@braju.com>
Description: High-performing functions operating on rows and columns of matrices, e.g. col / rowMedians(), col / rowRanks(), and col / rowSds(). Functions optimized per data type and for subsetted calculations such that both memory usage and processing time is minimized. There are also optimized vector-based methods, e.g. binMeans(), madDiff() and weightedMedian().
License: Artistic-2.0
......@@ -32,7 +33,7 @@ NeedsCompilation: yes
ByteCompile: TRUE
URL: https://github.com/HenrikBengtsson/matrixStats
BugReports: https://github.com/HenrikBengtsson/matrixStats/issues
RoxygenNote: 6.0.1
Packaged: 2018-07-23 15:25:55 UTC; hb
RoxygenNote: 6.1.1
Packaged: 2019-09-06 06:24:24 UTC; hb
Repository: CRAN
Date/Publication: 2018-07-23 20:40:03 UTC
Date/Publication: 2019-09-07 16:50:15 UTC
This diff is collapsed.
This diff is collapsed.
......@@ -28,11 +28,9 @@
#' @export
indexByRow <- function(dim, idxs = NULL, ...) {
if (is.matrix(dim)) {
.Deprecated(msg = "indexByRow(x) where 'x' is a matrix is deprecated. Use indexByRow(dim(x)) instead.")
dim <- dim(dim)
} else {
dim <- as.integer(dim)
.Defunct(msg = "indexByRow(x) where 'x' is a matrix is defunct. Use indexByRow(dim(x)) instead.")
}
if (!is.null(idxs)) idxs <- as.integer(idxs)
.Call(C_indexByRow, dim, idxs)
.Call(C_indexByRow, as.integer(dim), idxs)
}
......@@ -34,7 +34,7 @@
#' @section Logical \code{value}:
#' When \code{value} is logical, the result is as if the function is applied
#' on \code{as.logical(x)}. More specifically, if \code{x} is numeric, then
#' all zeros are treates as \code{FALSE}, non-zero values as \code{TRUE},
#' all zeros are treated as \code{FALSE}, non-zero values as \code{TRUE},
#' and all missing values as \code{NA}.
#'
#' @example incl/rowAlls.R
......
......@@ -4,7 +4,7 @@
#' Cumulative sums, products, minima and maxima for each row (column) in a
#' matrix.
#'
#' @param x A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.
#' @param x An NxK \code{\link[base]{matrix}}.
#'
#' @param rows,cols A \code{\link[base]{vector}} indicating subset of elements
#' (or rows and/or columns) to operate over. If \code{\link[base]{NULL}}, no
......@@ -17,7 +17,9 @@
#' @param ... Not used.
#'
#' @return Returns a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}
#' of the same mode as \code{x}.
#' of the same mode as \code{x}, except when \code{x} is of mode
#' \code{\link[base]{logical}}, then the return type is
#' \code{\link[base]{integer}}.
#'
#' @example incl/rowCumsums.R
#'
......
......@@ -34,7 +34,7 @@ rowMeans2 <- function(x, rows = NULL, cols = NULL, na.rm = FALSE,
na.rm <- as.logical(na.rm)
has_nas <- TRUE
return(.Call(C_rowMeans2, x, dim., rows, cols, na.rm, has_nas, TRUE))
.Call(C_rowMeans2, x, dim., rows, cols, na.rm, has_nas, TRUE)
}
#' @rdname rowMeans2
......@@ -45,5 +45,5 @@ colMeans2 <- function(x, rows = NULL, cols = NULL, na.rm = FALSE,
na.rm <- as.logical(na.rm)
has_nas <- TRUE
return(.Call(C_rowMeans2, x, dim., rows, cols, na.rm, has_nas, FALSE))
.Call(C_rowMeans2, x, dim., rows, cols, na.rm, has_nas, FALSE)
}
#' Gets the rank of each row (column) of a matrix
#' Gets the rank of the elements in each row (column) of a matrix
#'
#' Gets the rank of each row (column) of a matrix.
#'
#' The row ranks of \code{x} are collected as \emph{rows} of the result matrix.
#'
#' The column ranks of \code{x} are collected as \emph{rows} if
#' \code{preserveShape = FALSE}, otherwise as \emph{columns}.
#'
#' The implementation is optimized for both speed and memory. To avoid
#' coercing to \code{\link[base]{double}}s (and hence memory allocation), there
#' is a unique implementation for \code{\link[base]{integer}} matrices. It is
#' more memory efficient to do \code{colRanks(x, preserveShape = TRUE)} than
#' \code{t(colRanks(x, preserveShape = FALSE))}.
#'
#' Any \code{\link[base]{names}} of \code{x} are ignored and absent in the
#' result.
#' Gets the rank of the elements in each row (column) of a matrix.
#'
#' @param x A \code{\link[base]{numeric}} or \code{\link[base]{integer}} NxK
#' \code{\link[base]{matrix}}.
......@@ -36,40 +22,89 @@
#'
#' @param ... Not used.
#'
#' @return An \code{\link[base]{integer}} \code{\link[base]{matrix}} is
#' returned. The \code{rowRanks()} function always returns an NxK
#' @return A \code{\link[base]{matrix}} of type \code{\link[base]{integer}} is
#' returned, unless \code{ties.method = "average"} when it is of type
#' \code{\link[base]{numeric}}.
#'
#' The \code{rowRanks()} function always returns an NxK
#' \code{\link[base]{matrix}}, where N (K) is the number of rows (columns)
#' whose ranks are calculated.
#'
#' The \code{colRanks()} function returns an NxK \code{\link[base]{matrix}}, if
#' \code{preserveShape = TRUE}, otherwise a KxN \code{\link[base]{matrix}}.
#'
#' %% The mode of the returned matrix is \code{\link[base]{integer}}, except
#' for %% \code{ties.method == "average"} when it is
#' \code{\link[base]{double}}.
#' Any \code{\link[base]{names}} of \code{x} are ignored and absent in the
#' result.
#'
#' @section Missing and non- values: These are ranked as \code{NA}, as with
#' \code{na.last = "keep"} in the \code{\link[base]{rank}}() function.
#' @details
#' These functions rank values and treats missing values the same way as
#' \code{\link[base]{rank}}().
#' For equal values ("ties"), argument \code{ties.method} determines how these
#' are ranked among each other. More precisely, for the following values of
#' \code{ties.method}, each index set of ties consists of:
#' \itemize{
#' \item{\code{"first"} - increasing values that are all unique}
#' \item{\code{"last"} - decreasing values that are all unique}
#' \item{\code{"min"} - identical values equaling the minimum of
#' their original ranks}
#' \item{\code{"max"} - identical values equaling the maximum of
#' their original ranks}
#' \item{\code{"average"} - identical values that equal the sample mean of
#' their original ranks. Because the average is calculated, the returned
#' ranks may be non-integer values}
#' \item{\code{"random"} - randomly shuffled values of their original ranks.}
#' \item{\code{"dense"} - increasing values that are all unique and,
#' contrary to \code{"first"}, never contain any gaps}
#' }
#' For more information on \code{ties.method = "dense"}, see \code{frank()} of
#' the \pkg{data.table} package.
#' For more information on the other alternatives, see \code{\link[base]{rank}}().
#'
#' Note that, due to different randomization strategies, the shuffling order
#' produced by these functions when using \code{ties.method = "random"} does
#' not reproduce that of \code{\link[base]{rank}}().
#'
#' \emph{WARNING: For backward-compatibility reasons, the default is
#' \code{ties.method = "max"}, which differs from \code{\link[base]{rank}}()
#' which uses \code{ties.method = "average"} by default.
#' Since we plan to change the default behavior in a future version, we recommend
#' to explicitly specify the intended value of argument \code{ties.method}.}
#'
#' @section Missing values:
#' Missing values are ranked as \code{NA_integer_}, as with \code{na.last = "keep"}
#' in the \code{\link[base]{rank}}() function.
#'
#' @section Performance:
#' The implementation is optimized for both speed and memory. To avoid
#' coercing to \code{\link[base]{double}}s (and hence memory allocation),
#' there is a unique implementation for \code{\link[base]{integer}} matrices.
#' Furthermore, it is more memory efficient to do
#' \code{colRanks(x, preserveShape = TRUE)} than
#' \code{t(colRanks(x, preserveShape = FALSE))}.
#'
#' @author Hector Corrada Bravo and Harris Jaffee. Peter Langfelder for adding
#' 'ties.method' support. Henrik Bengtsson adapted the original native
#' 'ties.method' support. Brian Montgomery for adding more 'ties.method's.
#' Henrik Bengtsson adapted the original native
#' implementation of \code{rowRanks()} from Robert Gentleman's \code{rowQ()} in
#' the \pkg{Biobase} package.
#'
#' @seealso \code{\link[base]{rank}}(). For developers, see also Section
#' 'Utility functions' in 'Writing R Extensions manual', particularly the
#' @seealso
#' For developers, see also Section Utility functions' in
#' 'Writing R Extensions manual', particularly the
#' native functions \code{R_qsort_I()} and \code{R_qsort_int_I()}.
#' @keywords array iteration robust univar
#'
#' @export
rowRanks <- function(x, rows = NULL, cols = NULL,
ties.method = c("max", "average", "min"),
# max is listed twice so that it remains the default for now
ties.method = c("max", "average", "first", "last", "random",
"max", "min", "dense"),
dim. = dim(x), ...) {
# Argument 'ties.method':
ties.method <- ties.method[1L]
ties_method <- charmatch(ties.method, c("max", "average", "min"),
nomatch = 0L)
ties_method <- charmatch(ties.method, c("average", "first", "last", "random",
"max", "min", "dense"), nomatch = 0L)
if (ties_method == 0L) {
stop("Unknown value of argument 'ties.method': ", ties.method)
}
......@@ -83,7 +118,9 @@ rowRanks <- function(x, rows = NULL, cols = NULL,
#' @rdname rowRanks
#' @export
colRanks <- function(x, rows = NULL, cols = NULL,
ties.method = c("max", "average", "min"),
# max is listed twice so that it remains the default for now
ties.method = c("max", "average", "first", "last", "random",
"max", "min", "dense"),
dim. = dim(x), preserveShape = FALSE, ...) {
# Argument 'ties.method':
ties.method <- ties.method[1L]
......@@ -91,8 +128,8 @@ colRanks <- function(x, rows = NULL, cols = NULL,
# Argument 'preserveShape'
preserveShape <- as.logical(preserveShape)
ties_method <- charmatch(ties.method, c("max", "average", "min"),
nomatch = 0L)
ties_method <- charmatch(ties.method, c("average", "first", "last", "random",
"max", "min", "dense"), nomatch = 0L)
if (ties_method == 0L) {
stop("Unknown value of argument 'ties.method': ", ties.method)
}
......
......@@ -34,7 +34,7 @@ rowSums2 <- function(x, rows = NULL, cols = NULL, na.rm = FALSE,
na.rm <- as.logical(na.rm)
has_nas <- TRUE
return(.Call(C_rowSums2, x, dim., rows, cols, na.rm, has_nas, TRUE))
.Call(C_rowSums2, x, dim., rows, cols, na.rm, has_nas, TRUE)
}
#' @rdname rowSums2
......@@ -45,5 +45,5 @@ colSums2 <- function(x, rows = NULL, cols = NULL, na.rm = FALSE,
na.rm <- as.logical(na.rm)
has_nas <- TRUE
return(.Call(C_rowSums2, x, dim., rows, cols, na.rm, has_nas, FALSE))
.Call(C_rowSums2, x, dim., rows, cols, na.rm, has_nas, FALSE)
}
#' Tabulates the values in a matrix by row (column)
#'
#' Tabulates the values in a matrix by row (column).
#'
#'
#' @param x An \code{\link[base]{integer}}, a \code{\link[base]{logical}}, or
#' a \code{\link[base]{raw}} NxK \code{\link[base]{matrix}}.
#'
......@@ -19,6 +16,17 @@
#' number of row (column) \code{\link[base]{vector}}s tabulated and J is the
#' number of values counted.
#'
#' @details
#' An alternative to these functions, is to use \code{table(x, row(x))}
#' and \code{table(x, col(x))}, with the exception that the latter do not
#' support the \code{\link[base]{raw}} data type.
#' When there are no missing values in \code{x}, we have that
#' \code{all(rowTabulates(x) == t(table(x, row(x))))} and
#' \code{all(colTabulates(x) == t(table(x, col(x))))}.
#' When there are missing values, we have that
#' \code{all(rowTabulates(x) == t(table(x, row(x), useNA = "always")[, seq_len(nrow(x))]))} and
#' \code{all(colTabulates(x) == t(table(x, col(x), useNA = "always")[, seq_len(ncol(x))]))}.
#'
#' @example incl/rowTabulates.R
#'
#' @author Henrik Bengtsson
......
......@@ -85,7 +85,7 @@ weightedMad <- function(x, w = NULL, idxs = NULL, na.rm = FALSE,
# Remove values with zero (and negative) weight. This will:
# 1) take care of the case when all weights are zero,
# 2) it will most likely speed up the sorting.
tmp <- (w > 0)
tmp <- (is.na(w) | w > 0)
if (!all(tmp)) {
x <- .subset(x, tmp)
w <- .subset(w, tmp)
......@@ -95,7 +95,7 @@ weightedMad <- function(x, w = NULL, idxs = NULL, na.rm = FALSE,
# Drop missing values?
if (na.rm) {
keep <- which(!is.na(x) & !is.na(w))
keep <- which(!is.na(x))
x <- .subset(x, keep)
w <- .subset(w, keep)
n <- length(x)
......@@ -104,6 +104,9 @@ weightedMad <- function(x, w = NULL, idxs = NULL, na.rm = FALSE,
return(na_value)
}
# Missing values in 'w'?
if (anyMissing(w)) return(na_value)
# Are any weights Inf? Then treat them with equal weight and all others
# with weight zero.
tmp <- is.infinite(w)
......
......@@ -9,6 +9,7 @@
#' @param w a vector of weights the same length as \code{x} giving the weights
#' to use for each element of \code{x}. Negative weights are treated as zero
#' weights. Default value is equal weight to all values.
#' If a missing-value weight exists, the result is always a missing value.
#'
#' @param idxs A \code{\link[base]{vector}} indicating subset of elements to
#' operate over. If \code{\link[base]{NULL}}, no subsetting is done.
......@@ -30,7 +31,8 @@
#'
#' @example incl/weightedMean.R
#'
#' @section Missing values: This function handles missing values consistently
#' @section Missing values:
#' This function handles missing values consistently with
#' \code{\link[stats]{weighted.mean}}. More precisely, if \code{na.rm = FALSE},
#' then any missing values in either \code{x} or \code{w} will give result
#' \code{NA_real_}. If \code{na.rm = TRUE}, then all \code{(x, w)} data points
......
......@@ -53,6 +53,10 @@
#' If one or more weights are \code{Inf}, it is the same as these weights have
#' the same weight and the others have zero. This makes things easier for cases
#' where the weights are result of a division with zero.
#'
#' If there are missing values in \code{w} that are part of the calculation
#' (after subsetting and dropping missing values in \code{x}), then the final
#' result is always \code{NA} of the same type as \code{x}.
#'
#' The weighted median solves the following optimization problem:
#'
......
......@@ -28,8 +28,16 @@
#'
#' @return Returns a \code{\link[base]{numeric}} scalar.
#'
#' @section Missing values: Missing values are dropped at the very beginning,
#' if argument \code{na.rm} is \code{\link[base:logical]{TRUE}}, otherwise not.
#' @section Missing values:
#' This function handles missing values consistently with
#' \code{\link{weightedMean}}().
#' More precisely, if \code{na.rm = FALSE}, then any missing values in either
#' \code{x} or \code{w} will give result \code{NA_real_}.
#' If \code{na.rm = TRUE}, then all \code{(x, w)} data points for which
#' \code{x} is missing are skipped. Note that if both \code{x} and \code{w}
#' are missing for a data points, then it is also skipped (by the same rule).
#' However, if only \code{w} is missing, then the final results will always
#' be \code{NA_real_} regardless of \code{na.rm}.
#'
#' @author Henrik Bengtsson
#'
......@@ -76,11 +84,10 @@ weightedVar <- function(x, w = NULL, idxs = NULL, na.rm = FALSE,
na_value <- NA
storage.mode(na_value) <- storage.mode(x)
# Remove values with zero (and negative) weight. This will:
# 1) take care of the case when all weights are zero,
# 2) it will most likely speed up the sorting.
tmp <- (w > 0)
tmp <- (is.na(w) | w > 0)
if (!all(tmp)) {
x <- .subset(x, tmp)
w <- .subset(w, tmp)
......@@ -90,7 +97,7 @@ weightedVar <- function(x, w = NULL, idxs = NULL, na.rm = FALSE,
# Drop missing values?
if (na.rm) {
keep <- which(!is.na(x) & !is.na(w))
keep <- which(!is.na(x))
x <- .subset(x, keep)
w <- .subset(w, keep)
n <- length(x)
......@@ -98,6 +105,9 @@ weightedVar <- function(x, w = NULL, idxs = NULL, na.rm = FALSE,
} else if (anyMissing(x)) {
return(na_value)
}
# Missing values in 'w'?
if (anyMissing(w)) return(na_value)
# Are any weights Inf? Then treat them with equal weight and all others
# with weight zero.
......
No preview for this file type
al
AppVeyor
benchmarking
Benchmarking
binMeans
Centre
CMD
Cormen
Corrada
El
et
exponentials
Ghaoui
github
HenrikBengtsson
Hmisc
https
JxN
Koenker
Kx
KxJ
KxM
KxN
Leiserson
logsumexp
LSE
Lund
macOS
madDiff
MxJ
na
Nakayama
Neumann
Nx
NxJ
NxK
NxM
pre
Pre
Rivest
rowAlls
rowCounts
rowMedians
rowRanks
rowSds
Rtools
underflowing
von
weightedMedian
Xcode
xK
......@@ -166,7 +166,7 @@ hr {
<body>
<h1>matrixStats: Summary of functions</h1>
<p>Henrik Bengtsson on July 23, 2018</p>
<p>Henrik Bengtsson on NA</p>
<h2>Location and scale estimators</h2>
......@@ -390,7 +390,7 @@ hr {
<hr/>
<p>matrixStats v0.54.0. Release: <a href="https://cran.r-project.org/package=matrixStats">CRAN</a>, Development: <a href="https://github.com/HenrikBengtsson/matrixStats">GitHub</a>.</p>
<p>matrixStats v0.55.0. Release: <a href="https://cran.r-project.org/package=matrixStats">CRAN</a>, Development: <a href="https://github.com/HenrikBengtsson/matrixStats">GitHub</a>.</p>
</body>
......
......@@ -25,10 +25,10 @@ kable <- function(df, ...) {
fcns <- strsplit(fcns, split=",")
fcns <- sapply(fcns, FUN=function(names) {
names <- trim(names)
ok <- sapply(names, FUN=exists, envir=ns, mode="function")
names[ok] <- sprintf("%s()", names[ok])
names[!ok] <- sprintf("~~%s()~~", names[!ok])
names <- paste(names, collapse=", ")
ok <- sapply(names, FUN=exists, envir=ns, mode="function")
names[ok] <- sprintf("%s()", names[ok])
names[!ok] <- sprintf("~~%s()~~", names[!ok])
names <- paste(names, collapse=", ")
})
df$Functions <- fcns
df$Example <- sprintf("`%s`", df$Example)
......
......@@ -64,7 +64,7 @@ only for these calculations.
When \code{value} is logical, the result is as if the function is applied
on \code{as.logical(x)}. More specifically, if \code{x} is numeric, then
all zeros are treates as \code{FALSE}, non-zero values as \code{TRUE},
all zeros are treated as \code{FALSE}, non-zero values as \code{TRUE},
and all missing values as \code{NA}.
}
......
......@@ -29,7 +29,7 @@ rowCummaxs(x, rows = NULL, cols = NULL, dim. = dim(x), ...)
colCummaxs(x, rows = NULL, cols = NULL, dim. = dim(x), ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{x}{An NxK \code{\link[base]{matrix}}.}
\item{rows, cols}{A \code{\link[base]{vector}} indicating subset of elements
(or rows and/or columns) to operate over. If \code{\link[base]{NULL}}, no
......@@ -43,7 +43,9 @@ length two specifying the dimension of \code{x}, also when not a
}
\value{
Returns a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}
of the same mode as \code{x}.
of the same mode as \code{x}, except when \code{x} is of mode
\code{\link[base]{logical}}, then the return type is
\code{\link[base]{integer}}.
}
\description{
Cumulative sums, products, minima and maxima for each row (column) in a
......
......@@ -5,11 +5,11 @@
\alias{colMeans2}
\title{Calculates the mean for each row (column) in a matrix}
\usage{
rowMeans2(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x),
...)
rowMeans2(x, rows = NULL, cols = NULL, na.rm = FALSE,
dim. = dim(x), ...)
colMeans2(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x),
...)
colMeans2(x, rows = NULL, cols = NULL, na.rm = FALSE,
dim. = dim(x), ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} or a \code{\link[base]{logical}}
......
......@@ -5,11 +5,11 @@
\alias{colMedians}
\title{Calculates the median for each row (column) in a matrix}
\usage{
rowMedians(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x),
...)
rowMedians(x, rows = NULL, cols = NULL, na.rm = FALSE,
dim. = dim(x), ...)
colMedians(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x),
...)
colMedians(x, rows = NULL, cols = NULL, na.rm = FALSE,
dim. = dim(x), ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
......
......@@ -5,11 +5,11 @@
\alias{colQuantiles}
\title{Estimates quantiles for each row (column) in a matrix}
\usage{
rowQuantiles(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by
= 0.25), na.rm = FALSE, type = 7L, ..., drop = TRUE)
rowQuantiles(x, rows = NULL, cols = NULL, probs = seq(from = 0, to =
1, by = 0.25), na.rm = FALSE, type = 7L, ..., drop = TRUE)
colQuantiles(x, rows = NULL, cols = NULL, probs = seq(from = 0, to = 1, by
= 0.25), na.rm = FALSE, type = 7L, ..., drop = TRUE)
colQuantiles(x, rows = NULL, cols = NULL, probs = seq(from = 0, to =
1, by = 0.25), na.rm = FALSE, type = 7L, ..., drop = TRUE)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}} with
......
......@@ -9,19 +9,23 @@
\alias{colMaxs}
\title{Gets the range of values in each row (column) of a matrix}
\usage{
rowRanges(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x),
rowRanges(x, rows = NULL, cols = NULL, na.rm = FALSE,
dim. = dim(x), ...)
rowMins(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x),
...)
rowMins(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ...)
rowMaxs(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x),
...)
rowMaxs(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ...)
colRanges(x, rows = NULL, cols = NULL, na.rm = FALSE,
dim. = dim(x), ...)
colRanges(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x),
colMins(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x),
...)
colMins(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ...)
colMaxs(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ...)
colMaxs(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x),
...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
......
......@@ -3,13 +3,15 @@
\name{rowRanks}
\alias{rowRanks}
\alias{colRanks}
\title{Gets the rank of each row (column) of a matrix}
\title{Gets the rank of the elements in each row (column) of a matrix}
\usage{
rowRanks(x, rows = NULL, cols = NULL, ties.method = c("max", "average",
"min"), dim. = dim(x), ...)
rowRanks(x, rows = NULL, cols = NULL, ties.method = c("max",
"average", "first", "last", "random", "max", "min", "dense"),
dim. = dim(x), ...)
colRanks(x, rows = NULL, cols = NULL, ties.method = c("max", "average",
"min"), dim. = dim(x), preserveShape = FALSE, ...)
colRanks(x, rows = NULL, cols = NULL, ties.method = c("max",
"average", "first", "last", "random", "max", "min", "dense"),
dim. = dim(x), preserveShape = FALSE, ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} or \code{\link[base]{integer}} NxK
......@@ -33,49 +35,82 @@ length two specifying the dimension of \code{x}, also when not a
\code{x}, or not.}
}
\value{
An \code{\link[base]{integer}} \code{\link[base]{matrix}} is
returned. The \code{rowRanks()} function always returns an NxK
A \code{\link[base]{matrix}} of type \code{\link[base]{integer}} is
returned, unless \code{ties.method = "average"} when it is of type
\code{\link[base]{numeric}}.
The \code{rowRanks()} function always returns an NxK
\code{\link[base]{matrix}}, where N (K) is the number of rows (columns)
whose ranks are calculated.
The \code{colRanks()} function returns an NxK \code{\link[base]{matrix}}, if
\code{preserveShape = TRUE}, otherwise a KxN \code{\link[base]{matrix}}.
%% The mode of the returned matrix is \code{\link[base]{integer}}, except
for %% \code{ties.method == "average"} when it is
\code{\link[base]{double}}.
Any \code{\link[base]{names}} of \code{x} are ignored and absent in the
result.
}
\description{
Gets the rank of each row (column) of a matrix.
Gets the rank of the elements in each row (column) of a matrix.
}
\details{
The row ranks of \code{x} are collected as \emph{rows} of the result matrix.
These functions rank values and treats missing values the same way as
\code{\link[base]{rank}}().
For equal values ("ties"), argument \code{ties.method} determines how these
are ranked among each other. More precisely, for the following values of
\code{ties.method}, each index set of ties consists of:
\itemize{
\item{\code{"first"} - increasing values that are all unique}
\item{\code{"last"} - decreasing values that are all unique}
\item{\code{"min"} - identical values equaling the minimum of
their original ranks}
\item{\code{"max"} - identical values equaling the maximum of
their original ranks}
\item{\code{"average"} - identical values that equal the sample mean of
their original ranks. Because the average is calculated, the returned
ranks may be non-integer values}
\item{\code{"random"} - randomly shuffled values of their original ranks.}
\item{\code{"dense"} - increasing values that are all unique and,
contrary to \code{"first"}, never contain any gaps}
}
For more information on \code{ties.method = "dense"}, see \code{frank()} of
the \pkg{data.table} package.
For more information on the other alternatives, see \code{\link[base]{rank}}().
The column ranks of \code{x} are collected as \emph{rows} if
\code{preserveShape = FALSE}, otherwise as \emph{columns}.
Note that, due to different randomization strategies, the shuffling order
produced by these functions when using \code{ties.method = "random"} does
not reproduce that of \code{\link[base]{rank}}().
The implementation is optimized for both speed and memory. To avoid
coercing to \code{\link[base]{double}}s (and hence memory allocation), there
is a unique implementation for \code{\link[base]{integer}} matrices. It is
more memory efficient to do \code{colRanks(x, preserveShape = TRUE)} than
\code{t(colRanks(x, preserveShape = FALSE))}.
\emph{WARNING: For backward-compatibility reasons, the default is
\code{ties.method = "max"}, which differs from \code{\link[base]{rank}}()
which uses \code{ties.method = "average"} by default.
Since we plan to change the default behavior in a future version, we recommend
to explicitly specify the intended value of argument \code{ties.method}.}
}
\section{Missing values}{
Any \code{\link[base]{names}} of \code{x} are ignored and absent in the
result.
Missing values are ranked as \code{NA_integer_}, as with \code{na.last = "keep"}
in the \code{\link[base]{rank}}() function.
}
\section{Missing and non- values}{
These are ranked as \code{NA}, as with
\code{na.last = "keep"} in the \code{\link[base]{rank}}() function.
\section{Performance}{
The implementation is optimized for both speed and memory. To avoid
coercing to \code{\link[base]{double}}s (and hence memory allocation),
there is a unique implementation for \code{\link[base]{integer}} matrices.
Furthermore, it is more memory efficient to do
\code{colRanks(x, preserveShape = TRUE)} than
\code{t(colRanks(x, preserveShape = FALSE))}.
}
\seealso{
\code{\link[base]{rank}}(). For developers, see also Section
'Utility functions' in 'Writing R Extensions manual', particularly the
For developers, see also Section Utility functions' in
'Writing R Extensions manual', particularly the
native functions \code{R_qsort_I()} and \code{R_qsort_int_I()}.
}
\author{
Hector Corrada Bravo and Harris Jaffee. Peter Langfelder for adding
'ties.method' support. Henrik Bengtsson adapted the original native
'ties.method' support. Brian Montgomery for adding more 'ties.method's.
Henrik Bengtsson adapted the original native
implementation of \code{rowRanks()} from Robert Gentleman's \code{rowQ()} in
the \pkg{Biobase} package.
}
......
......@@ -7,11 +7,11 @@
\alias{colSds}
\title{Standard deviation estimates for each row (column) in a matrix}
\usage{
rowMads(x, rows = NULL, cols = NULL, center = NULL, constant = 1.4826,
na.rm = FALSE, dim. = dim(x), ...)
rowMads(x, rows = NULL, cols = NULL, center = NULL,
constant = 1.4826, na.rm = FALSE, dim. = dim(x), ...)
colMads(x, rows = NULL, cols = NULL, center = NULL, constant = 1.4826,
na.rm = FALSE, dim. = dim(x), ...)
colMads(x, rows = NULL, cols = NULL, center = NULL,
constant = 1.4826, na.rm = FALSE, dim. = dim(x), ...)
rowSds(x, rows = NULL, cols = NULL, na.rm = FALSE, center = NULL,
dim. = dim(x), ...)
......
......@@ -5,9 +5,11 @@
\alias{colSums2}
\title{Calculates the sum for each row (column) in a matrix}
\usage{
rowSums2(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x), ...)
rowSums2(x, rows = NULL, cols = NULL, na.rm = FALSE, dim. = dim(x),