Commit 26e540cb authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 0.4-0

2012-01-12 Martin Maechler <>
* DESCRIPTION (Version): 0.3-1
(Depends): R >= 2.14.0, so we can
* man/getCall.Rd: remove entirely
* R/modelMatrix.R: ditto:
get rid of getCall (and all the new wrong R CMD check warnings).
2011-08-19 Martin Maechler <>
* DESCRIPTION (Version): 0.3-0
* R/modelMatrix.R:
* NAMESPACE: only define & export getCall() if R < 2.14
2011-02-17 Douglas Bates <>
* DESCRIPTION: Remove Encoding: directive.
2011-01-17 Douglas Bates <>
* R/modelMatrix.R: Spelling correction.
2010-11-27 Martin Maechler <>
* R/AllClass.R: comment stopifnot() which prevents INSTALL
2010-11-01 Douglas Bates <>
* R/AllClass.R (rMod,glrMod): Initial attempt at using reference
classes for the response modules. Still some problems with the
contains argument to setRefClass for GLMRespMod.
2010-08-23 Martin Maechler <>
* R/modelMatrix.R (model.Matrix, glm4): use argument
'drop.unused.levels' (and depend on Matrix version *-44).
2010-08-10 Martin Maechler <>
* R/AllGeneric.R, man/resid-et-al.Rd: define "ANY"-method (and
hence generic) for the three standard aliases
resid(), fitted.values() and coefficients(), and
* man/glm4.Rd: check some of these
2010-08-09 Douglas Bates <>
* R/modelMatrix.R: added "fitted" and "residuals" methods for
"respModule" classes. Modified corresponding methods for
"glpModel" to chain to these. Modified the reweightPred methods
to allow for ncol(sqrtXwt) > 1.
* DESCRIPTION (Version): 0.2-0, CRAN-released: ...
Package: MatrixModels
Version: 0.4-0
Date: 2015-01-13
Title: Modelling with Sparse And Dense Matrices
Author: Douglas Bates <> and Martin Maechler <>
Maintainer: Martin Maechler <>
Contact: Doug and Martin <>
Description: Modelling with sparse and dense 'Matrix' matrices, using
modular prediction and response module classes.
Depends: R (>= 2.14.0), utils
Imports: stats, methods, Matrix (>= 1.0-1)
Encoding: UTF-8
LazyLoad: yes
License: GPL (>= 2)
Packaged: 2015-01-13 11:52:51 UTC; maechler
NeedsCompilation: no
Repository: CRAN
Date/Publication: 2015-01-14 06:22:36
4ec9cd332840cbabc09ba9c99fa80e3a *ChangeLog
5949b25fd175193a8189b9f3467f510e *DESCRIPTION
2658e5da3b0bcf26640950524ef4632d *NAMESPACE
7a7f7791efa7046efabbb67ab6e212ae *R/AllClass.R
412fdc8380154f704350319d8d0e42b4 *R/AllGeneric.R
602b0e257fb33e271659eb18b1d547e2 *R/modelMatrix.R
0d4a8c52756fab885df4ad945c144220 *man/Model-class.Rd
bb36643f358dd859d4239f8a2d13c838 *man/glm4.Rd
cb5bd701214e3700654c964b62fa70c2 *man/glpModel-class.Rd
f532227fdb1bcbc968a82f48ef00b3d0 *man/
1307c69cb686f9cf520baf1bea46ef60 *man/mkRespMod.Rd
346eee8ca51b20a83991469da5fe342a *man/model.Matrix.Rd
a24dafdb447ad0dfd8fb9ab88130fe4e *man/modelMatrix-class.Rd
a2d4fd05608ce09bcc24bf51f2e8aa0e *man/predModule-class.Rd
8949a8027658932d62b32c8b06c3a59f *man/resid-et-al.Rd
093c0a234847d812352d2e9768d18584 *man/respModule-class.Rd
4447a50545cdd04a3b61d824d9f499ef *man/reweightPred.Rd
7182cbd142e779b2992c67ee868aeb6b *man/solveCoef.Rd
8a94f53b5077878b35d87442c5267ed1 *man/updateMu.Rd
f71ab1edc84da9a6d7521563e7d6b900 *man/updateWts.Rd
97b70463118d303a8fbcd8057f399b6e *tests/MModels.R
##useDynLib(MatrixModels, .registration=TRUE)
## Import non-base functions we need explicitly,
## notably for which we define methods
## -- prefering importMethodsFrom(., ...) where applicable
## importFrom("graphics", image)
## importFrom("utils", head, tail)
importFrom("stats" ## potentially all these (we import into 'lme4a'):
# , anova
, coef, coefficients # confint, cov2cor, deviance,
, fitted, fitted.values
, formula # predict, profile
, model.extract, model.matrix, model.offset, model.response, model.weights
, residuals, resid # 'resid' needed too, unfortunately..
# , simulate, terms
, update
## according to codetoolsBioC :: writeNamespaceImports("MatrixModels"):
importClassesFrom("methods", ANY, call, character, environment, envRefClass,
integer, list, matrix, numeric, oldClass)
, callGeneric, as, is, extends, new
, getClass, getClassDef, validObject
, setClass, setClassUnion
, setGeneric
, setMethod, setOldClass
, setRefClass
, setValidity, slot, "slot<-", slotNames
, signature, representation, prototype)
## Those our methods and functions use:
importMethodsFrom("Matrix", as.matrix, as.vector, coerce,
## Group Methods
"Arith", "Compare", "Logic", "Math", "Math2", "Ops", "Summary",
t, "%*%", crossprod, tcrossprod,
Cholesky, chol, chol2inv,
summary, print,
update # notably the "CHMfactor" one
CHMfactor, CHMsimpl, CHMsuper, dCHMsimpl, dCHMsuper,
Cholesky, CholeskyFactorization,
compMatrix, corMatrix,
dgCMatrix, dgeMatrix, dMatrix,
dsparseMatrix, sparseMatrix, CsparseMatrix,
ddenseMatrix, denseMatrix,
generalMatrix, Matrix)
importFrom("Matrix", Diagonal, isLDL, sparse.model.matrix)
## Generics and functions defined in this package -------------------------
#TODO "",# <- "somewhat experimental"
, "solveCoef"
, "reweightPred"
, "updateMu"
, "updateWts"
## --- linear predictor modules, containing a model matrix
"dPredModule",# dense &
"sPredModule",# sparse (for now)
## --- response modules, containing a response vector, etc.
"respModule", # base class and also linear model
"glmRespMod", # generalized linear models
"nlsRespMod", # nonlinear regression response
"nglmRespMod", # generalized nonlinear
"glpModel", "Model"
exportMethods(## for both own and "other" generics:
## re-export S4 methods, for "stats"-S3-generics:
"coef", "coefficients" ## , "cov2cor"
,"fitted", "fitted.values", "formula"
,"residuals", "resid"
,"print"# print(x, ...) when show(x) is not sufficient
## not yet ,"summary"
.onLoad <- function(lib, pkg) {
options(max.print = 10000)#-> show() of large matrices
## Model Matrix
representation(assign = "integer",
contrasts = "list", "VIRTUAL"),
contains = "Matrix",
validity = function(object) {
if(length(object@assign) != (p <- ncol(object)))
return(gettextf("'%s' slot must be integer of length %d",
"assign", p))
contr <- object@contrasts <- sapply(contr, class, USE.NAMES=FALSE)
if(length(nc <- names(contr)) != length( || !all(nchar(nc) > 0))
return(gettextf("'%s' slot must be a correctly named list"))
## TODO? length(contrasts) < maximal value in 'assign' <= p
contrCls <- c("character", "function", "matrix", "Matrix")
if(any(unlist(lapply(, function(cl) all(, contrCls)))))))
return(gettextf("'%s' slot must be named list of contrast functions or their names, or matrices",
setClass("sparseModelMatrix", representation("VIRTUAL"),
contains = c("CsparseMatrix", "modelMatrix"))
setClass("denseModelMatrix", representation("VIRTUAL"),
contains = c("denseMatrix", "modelMatrix"))
## The currently only *actual* denseModelMatrix class:
setClass( "ddenseModelMatrix", contains = c("dgeMatrix", "ddenseMatrix", "denseModelMatrix"))
## here, add "ddense*": does not influence slots, but yields consistent superclass ordering
## The currently only *actual* sparseModelMatrix class:
setClass("dsparseModelMatrix", contains = c("dgCMatrix", "sparseModelMatrix"))
###------ Modules related to modelling --- basically in two parts --------------
###------ 1) "prediction-Module" -- currently in a sparse and dense flavor
###------ 2) "response-Module"
## Linear predictor modules, which consist of the model matrix, the
## coefficient vector and a triangular factor of the weighted model matrix.
## the super class contains the slots already;
representation(X = "modelMatrix", coef = "numeric", Vtr = "numeric",
fac = "CholeskyFactorization",
## the sub classes specify more specific classes for the two non-trivial slots:
setClass("dPredModule", contains = "predModule",
representation(X = "ddenseModelMatrix", fac = "Cholesky"))
setClass("sPredModule", contains = "predModule",
representation(X = "dsparseModelMatrix", fac = "CHMfactor"))
## Response modules for models with a linear predictor, which can
## include linear models, generalized linear models, nonlinear models
## and generalized nonlinear models.
## y, offset and mu are as expected. Note that length(offset) can be a multiple of length(y)
## weights are the prior weights
## sqrtrwt and sqrtXwt are the square roots of residual and X weights
representation(mu = "numeric", # of length n
offset = "numeric", # of length n * s
sqrtXwt = "matrix", # of dim(.) == (n, s)
sqrtrwt = "numeric", # sqrt(residual weights)
weights = "numeric", # prior weights
wtres = "numeric",
y = "numeric"),
validity = function(object) {
n <- length(object@y)
if (any(n != sapply(lapply(c("weights","sqrtrwt","mu","wtres"
), slot, object = object), length)))
return("lengths of weights, sqrtwt and mu must match length(y)")
lo <- length(object@offset)
if (!lo || lo %% n)
return("length(offset) must be a positive multiple of length(y)")
if (length(object@sqrtXwt) != lo)
return("length(sqrtXwt) must equal length(offset)")
if (nrow(object@sqrtXwt) != n)
return("nrow(sqrtXwt) != length(y)")
##' glm response module
representation(family = "family",
eta = "numeric",
n = "numeric"), # for evaluation of the aic
contains = "respModule",
validity = function(object) {
if (length(object@eta) != length(object@y))
return("lengths of eta and y must match")
##' nls response module
representation(nlenv = "environment",
nlmod = "call",
pnames = "character"),
contains = "respModule",
validity = function(object) {
n <- length(object@y)
N <- length(object@offset)
s <- N %/% n
lpn <- length(object@pnames)
if (lpn != s) return(sprintf("length(pnames) = %d != s = %d", lpn, s))
dd <- dim(object@sqrtXwt)
if (!all(dd == c(n, s))) {
return(sprintf("dim(gradient) = (%d, %d), n = %d, s = %d",
dd[1], dd[2], n, s))
##' nglm response module
setClass("nglmRespMod", contains = c("glmRespMod", "nlsRespMod"))
### FIXME: move this eventually to 'methods':
## -----
##' The mother class of all (S4 based) (statistical / physical / ...) models in R:
setClass("Model", representation(call = "call", fitProps = "list",
##' Statistical models based on linear predictors
##' "glpModel" := General Linear Prediction Models
setClass("glpModel", representation(resp = "respModule", pred = "predModule"),
contains = "Model")
rMod <-
fields = list(
mu = "numeric", # of length n
n = "integer", # for evaluation of the aic
offset = "numeric", # of length n * s
sqrtXwt = "matrix", # of dim(.) == (n, s)
sqrtrwt = "numeric", # sqrt(residual weights)
weights = "numeric", # prior weights
wtres = "numeric",
y = "numeric"),
methods = list(
initialize = function(...) {
if (length(n) == 0L) n <<- length(y)
s <- 0L
##currently fails at pkg INSTALL time: stopifnot(n > 0L)
if (length(weights) == 0L) weights <<- numeric(n) + 1
sqrtrwt <<- sqrt(weights)
if (any((dd <- dim(sqrtXwt)) < 1L))
sqrtXwt <<- matrix(1, ncol = 1L, nrow = n)
else {
stopifnot(nrow(sqrtXwt) == n)
s <- ncol(sqrtXwt)
swrk <- max(s, 1L)
if (length(offset) == 0) offset <<- numeric(n * swrk)
else {
so <- length(offset) %/% n
stopifnot(length(offset) %% n == 0, s == 0 || so == s)
wtres <<- mu <<- numeric(n) * NA_real_
updateMu = function(gamma) {
gamma <- as.numeric(gamma)
stopifnot(length(gamma) == length(offset))
mu <<- gamma + offset
wtres <<- sqrtrwt * (y - mu)
updateWts = function() {}
glrMod <-
fields = list(
family = "family",
eta = "numeric"),
contains = "RespModule",
methods = list(
initialize = function(...) {
args <- list(...)
stopifnot("family" %in% names(args), is(args$family, "family"))
family <<- args$family
updateMu = function(gamma) {
gamma <- as.numeric(gamma)
stopifnot(length(gamma) == length(offset))
mu <<- family$linkinv(eta <<- offset + gamma)
wtres <<- sqrtrwt * (y - mu)
updateWts = function() {
sqrtrwt <<- rtrwt <- sqrt(weights/family$variance(mu))
sqrtXwt[] <<- rtrwt * family$mu.eta(eta)
wtres <<- rtrwt * (y - mu)
##' Updates the mean vector mu given the linear predictor
##' gamma. Evaluate the residuals and the weighted sum of squared
##' residuals.
##' Note that the offset is added to the linear predictor before
##' calculating mu.
##' The sqrtXwt matrix can be updated but the sqrtrwt should not be in
##' that the weighted sum of squared residuals should be calculated
##' relative to fixed weights. Reweighting is done in a separate call.
##' @title Update the fitted mean response
##' @param respM a response module
##' @param gamma the value of the linear predictor before adding the offset
##' @param ...
##' @return updated respM
setGeneric("updateMu", function(respM, gamma, ...)
##' Update the weights, sqrtrwt and sqrtXwt
##' @title Update the residual and X weights
##' @param respM a response module
##' @param ...
##' @return updated response module
setGeneric("updateWts", function(respM, ...)
if (FALSE) { # don't need this generic in R
##' Set new values of the coefficients. Can be called with a single
##' vector argument and with a pair of vectors, representing a base and
##' an increment, plus a step factor.
##' @title set new values of the coefficients
##' @param predM a predictor module
##' @param base coefficient base value
##' @param incr increment
##' @param step step factor, defaults to 0 in which case incr is ignored
##' @param ...
##' @return predM
setGeneric("setCoef", function(predM, base, incr, step = 0, ...) standardGeneric("setCoef"))
##' Update any internal structures associated with sqrtXwt and the
##' weighted residuals. The "V" matrix is evaluated from X using the
##' sqrtXwt matrix and a Vtr vector is calculated.
##' @title Reweight Prediction Module Structure Internals
##' @param predM a predictor module
##' @param sqrtXwt the sqrtXwt matrix
##' @param wtres the vector of weighted residuals
##' @param ...
##' @return updated predM
setGeneric("reweightPred", function(predM, sqrtXwt, wtres, ...)
if (FALSE) { # don't need this generic in R
##' Return the gamma vector
##' @title
##' @param predM a predictor module
##' @param ...
##' @return X %*% coef
setGeneric("gammaInc", function(predM, ...)
##' Solve for the coefficients, usually in the form of
##' coef <- solve(predM@fac, predM@Vtr, system = "A")
##' The squared length of the intermediate solution is attached as an
##' attribute of the returned value.
##' @title solve for the coefficients or coefficient increment
##' @param predM
##' @param ...
##' @return coefficient vector or increment
setGeneric("solveCoef", function(predM, ...)
##------------ all these should wander to stats4 eventually: -----------------
## Make resid() into a reasonable S4 generic (still dispatching for S3):
setMethod("resid", "ANY", function(object, ...) residuals(object, ...))
## ditto for fitted.values() & coefficients():
setMethod("fitted.values", "ANY", function(object, ...) fitted(object, ...))
setMethod("coefficients", "ANY", function(object, ...) coef (object, ...))
This diff is collapsed.
%% "FIXME" --- move this to stats4
\title{Mother Class "Model" of all S4 Models}
Class \code{"Model"} is meant to be the mother class of all (S4) model
As some useful methods are already defined for \code{"Model"} objects,
derived classes inherit those \dQuote{for free}.
\section{Objects from the Class}{A virtual Class: No objects may be created from it.}
\item{\code{call}:}{the \code{\link{call}} which generated the model.}
\item{\code{fitProps}:}{a \code{\link{list}}; must be named,
i.e., have unique \code{\link{names}}, but can be empty.
When the main object is a \emph{fitted} model, the list will
typically have components such as \code{iter} (non-negative
integer) and \code{convergenece} (\code{\link{logical}} typically).
\item{formula}{\code{signature(x = "Model")}: extract the model
formula - if there is one, or \code{\link{NULL}}.}
\item{update}{\code{signature(object = "Model")}: Update the model
with a new formula, new data, \dots\dots etc. This semantically
equivalent (and as \R function almost identical) to the standard
\code{\link[stats]{update}} (package \pkg{stats}).}
\seealso{% as this will move to 'stats4':
the \code{\link[MatrixModels:glpModel-class]{glpModel}} class in package
\pkg{MatrixModels} which extends this class.
\title{Fitting Generalized Linear Models (using S4)}
\code{glm4}, very similarly as standard \R's \code{\link{glm}()} is
used to fit generalized linear models, specified by giving a symbolic
description of the linear predictor and a description of the error
It is more general, as it fits linear, generalized linear, non-linear
and generalized nonlinear models.
glm4(formula, family, data, weights, subset, na.action,
start = NULL, etastart, mustart, offset,
sparse = FALSE, drop.unused.levels = FALSE, doFit = TRUE,
control = list(\dots),
model = TRUE, x = FALSE, y = TRUE, contrasts = NULL, \dots)
\arguments{%% much cut & pasted from glm.Rd :
\item{formula}{an object of class \code{"\link{formula}"} (or one that
can be coerced to that class): a symbolic description of the
model to be fitted. The details of model specification are given
under \sQuote{Details}.}
\item{family}{a description of the error distribution and link
function to be used in the model. This can be a character string
naming a family function, a family function or the result of a call
to a family function. (See \code{\link{family}} for details of
family functions.)}
\item{data}{an optional data frame, list or environment (or object
coercible by \code{\link{}} to a data frame) containing
the variables in the model. If not found in \code{data}, the
variables are taken from \code{environment(formula)},
typically the environment from which \code{glm} is called.}
\item{weights}{an optional vector of \sQuote{prior weights} to be used
in the fitting process. Should be \code{NULL} or a numeric vector.}
\item{subset}{an optional vector specifying a subset of observations
to be used in the fitting process.}
\item{na.action}{a function which indicates what should happen
when the data contain \code{NA}s. The default is set by
the \code{na.action} setting of \code{\link{options}}, and is
\code{\link{}} if that is unset. The \sQuote{factory-fresh}
default is \code{\link{na.omit}}. Another possible value is
\code{NULL}, no action. Value \code{\link{na.exclude}} can be useful.}
\item{start, etastart, mustart}{
starting values for the parameters in the linear predictor, the
predictor itself and for the vector of means.}
\item{offset}{this can be used to specify an \emph{a priori} known
component to be included in the linear predictor during fitting.
This should be \code{NULL} or a numeric vector of length equal to
the number of cases. One or more \code{\link{offset}} terms can be
included in the formula instead or as well, and if more than one is
specified their sum is used. See \code{\link{model.offset}}.}
\item{sparse}{logical indicating if the model matrix should be sparse
or not.}
\item{drop.unused.levels}{used only when \code{sparse} is TRUE: Should
factors have unused levels dropped?
(This used to be true, \emph{implicitly} in the first versions up to
July 2010; the default has been changed for compatibility with
\R's standard (dense) \code{\link{model.matrix}()}.
\item{doFit}{logical indicating if the model should be fitted (or just
returned unfitted).}
a list with options on fitting; currently passed unchanged to
(hidden) function \code{IRLS()}.}
\item{model, x, y}{currently ignored; here for back compatibility with
\item{contrasts}{currently ignored}%--- FIXME
\item{\dots}{potentially arguments passed on to fitter functions; not
used currently.}
% \details{
% ...............
% }
an object of class \code{\linkS4class{glpModel}}.
% \references{
% }
\code{\link{glm}()} the standard \R function;\cr
\code{\link{}()} a sparse least squares fitter.
The resulting class \code{\linkS4class{glpModel}} documentation.
### All the following is very experimental -- and probably will change: -------
data(CO2, package="datasets")
## dense linear model
str(glm4(uptake ~ 0 + Type*Treatment, data=CO2, doFit = FALSE), 4)
## sparse linear model
str(glm4(uptake ~ 0 + Type*Treatment, data=CO2, doFit = FALSE,
sparse = TRUE), 4)
## From example(glm): -----------------
## Dobson (1990) Page 93: Randomized Controlled Trial :
str(trial <- data.frame(counts=c(18,17,15,20,10,20,25,13,12),
glm.D93 <- glm(counts ~ outcome + treatment, family=poisson, data=trial)
c.glm <- unname(coef(glm.D93))
glmM <- glm4(counts ~ outcome + treatment, family = poisson, data=trial)
glmM2 <- update(glmM, quick = FALSE) # slightly more accurate
glmM3 <- update(glmM, quick = FALSE, finalUpdate = TRUE)
# finalUpdate has no effect on 'coef'
stopifnot( identical(glmM2@pred@coef, glmM3@pred@coef),
all.equal(glmM @pred@coef, c.glm, tolerance=1e-7),
all.equal(glmM2@pred@coef, c.glm, tolerance=1e-12))
All.eq <- function(x,y, ...) all.equal(x,y, tolerance= 1e-12, ...)
stopifnot( ## ensure typos are *caught* :
inherits(try(glm4(counts ~ outcome + treatment, family=poisson, data=trial,
fooBar = FALSE)), "try-error"),
## check formula(.): {environments differ - FIXME?}
formula(glmM) == formula(glm.D93),
identical(coef(glmM2), coefficients(glmM3)),
All.eq (coef(glmM2), coefficients(glm.D93)),
identical(fitted.values(glmM2), fitted(glmM3)),
All.eq (residuals(glmM2), resid(glm.D93), check.attributes=FALSE),# names()% FIXME ??
identical(residuals(glmM2), resid(glmM3))
## Watch the iterations --- and use no intercept --> more sparse X
## 1) dense generalized linear model
glmM <- glm4(counts ~ 0+outcome + treatment, poisson, trial,
verbose = TRUE)
## 2) sparse generalized linear model
glmS <- glm4(counts ~ 0+outcome + treatment, poisson, trial,
verbose = TRUE, sparse = TRUE)
str(glmS, max.lev = 4)
stopifnot( all.equal(glmM@pred@coef, glmS@pred@coef),
all.equal(glmM@pred@Vtr, glmS@pred@Vtr) )
## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2)
clotting <- data.frame(u = c(5,10,15,20,30,40,60,80,100),
lot1 = c(118,58,42,35,27,25,21,19,18),
lot2 = c(69,35,26,21,18,16,13,12,12))
str(gMN <- glm4(lot1 ~ log(u), data=clotting, family=Gamma, verbose=TRUE))
glm. <- glm(lot1 ~ log(u), data=clotting, family=Gamma)
stopifnot( all.equal(gMN@pred@coef, unname(coef(glm.)), tolerance=1e-7) )
\title{Class "glpModel" of General Linear Prediction Models}
The class \code{"glpModel"} conceptually contains a very large class
of \emph{\dQuote{General Linear Prediction Models}}.
Its \code{resp} slot (of class \code{"\linkS4class{respModule}"}) may
model linear, non-linear, generalized linear and non-linear
generalized response models.
\section{Objects from the Class}{
Objects can be created by calls of the form \code{new("glpModel", ...)},
but typically rather are returned by our modeling functions, e.g., the
(experimental, hence currently hidden) \code{glm4()}.
\item{\code{resp}:}{a \code{"\linkS4class{respModule}"} object.}
\item{\code{pred}:}{a \code{"\linkS4class{predModule}"} object.}
Class \code{"\linkS4class{Model}"}, directly.%%-- FIXME move that stats4
\item{coef}{\code{signature(object = "glpModel")}: extract the
coefficient vector \eqn{\beta} from the object.}
\item{fitted}{\code{signature(object = "glpModel")}: fitted values;