Commit ea0e2b60 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.3-28

parent bb0260c9
Package: mgcv Package: mgcv
Version: 1.3-27 Version: 1.3-28
Author: Simon Wood <simon.wood@r-project.org> Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org> Maintainer: Simon Wood <simon.wood@r-project.org>
Title: GAMs with GCV smoothness estimation and GAMMs by REML/PQL Title: GAMs with GCV smoothness estimation and GAMMs by REML/PQL
Description: Routines for GAMs and other generalized ridge regression Description: Routines for GAMs and other generalized ridge regression
with multiple smoothing parameter selection by GCV or UBRE. with multiple smoothing parameter selection by GCV or
Also GAMMs by REML or PQL. Includes a gam() function. UBRE/AIC. Also GAMMs by REML or PQL. Includes a gam()
function.
Priority: recommended Priority: recommended
Depends: R (>= 2.3.0) Depends: R (>= 2.3.0)
Imports: graphics, stats Imports: graphics, stats
Suggests: nlme (>= 3.1-64), MASS (>= 7.2-2) Suggests: nlme (>= 3.1-64), MASS (>= 7.2-2)
LazyLoad: yes LazyLoad: yes
License: GPL version 2 or later License: GPL version 2 or later
Packaged: Wed Sep 26 22:48:19 2007; simon Packaged: Wed Oct 10 10:43:12 2007; simon
...@@ -848,14 +848,14 @@ extract.lme.cov<-function(b,data,start.level=1) ...@@ -848,14 +848,14 @@ extract.lme.cov<-function(b,data,start.level=1)
formXtViX <- function(V,X) formXtViX <- function(V,X)
## forms X'V^{-1}X as efficiently as possible given the structure of ## forms X'V^{-1}X as efficiently as possible given the structure of
## V (diagonal, block-diagonal, full) ## V (diagonal, block-diagonal, full)
{ X <- X[V$ind,] # have to re-order X according to V ordering { X <- X[V$ind,,drop=FALSE] # have to re-order X according to V ordering
if (is.list(V$V)) { ### block diagonal case if (is.list(V$V)) { ### block diagonal case
Z <- X Z <- X
j0<-1 j0<-1
for (i in 1:length(V$V)) for (i in 1:length(V$V))
{ Cv <- chol(V$V[[i]]) { Cv <- chol(V$V[[i]])
j1 <- j0+nrow(V$V[[i]])-1 j1 <- j0+nrow(V$V[[i]])-1
Z[j0:j1,]<-backsolve(Cv,as.matrix(X[j0:j1,]),transpose=TRUE) Z[j0:j1,]<-backsolve(Cv,X[j0:j1,,drop=FALSE],transpose=TRUE)
j0 <- j1 + 1 j0 <- j1 + 1
} }
res <- t(Z)%*%Z res <- t(Z)%*%Z
...@@ -883,6 +883,69 @@ new.name <- function(proposed,old.names) ...@@ -883,6 +883,69 @@ new.name <- function(proposed,old.names)
prop prop
} }
gammPQL <- function (fixed, random, family, data, correlation, weights,
control, niter = 30, verbose = TRUE, ...)
## service routine for `gamm' to do PQL fitting. Based on glmmPQL
## from the MASS library (Venables & Ripley). In particular, for back
## compatibility the numerical results should be identical with gamm
## fits by glmmPQL calls. Because `gamm' already does some of the
## preliminary stuff that glmmPQL does, gammPQL can be simpler. It also
## deals with the possibility of the original data frame containing
## variables called `zz' `wts' or `invwt'
{ off <- model.offset(data)
if (is.null(off)) off <- 0
wts <- weights
if (is.null(wts)) wts <- rep(1, nrow(data))
wts.name <- new.name("wts",names(data)) ## avoid overwriting what's already in `data'
data[[wts.name]] <- wts
fit0 <- NULL ## keep checking tools happy
## initial fit (might be better replaced with `gam' call)
eval(parse(text=paste("fit0 <- glm(formula = fixed, family = family, data = data,",
"weights =",wts.name,",...)")))
w <- fit0$prior.weights
eta <- fit0$linear.predictors
zz <- eta + fit0$residuals - off
wz <- fit0$weights
fam <- family
## find non clashing name for pseudodata and insert in formula
zz.name <- new.name("zz",names(data))
eval(parse(text=paste("fixed[[2]] <- quote(",zz.name,")")))
data[[zz.name]] <- zz ## pseudodata to `data'
## find non-clashing name fro inverse weights, and make
## varFixed formula using it...
invwt.name <- new.name("invwt",names(data))
data[[invwt.name]] <- 1/wz
w.formula <- as.formula(paste("~",invwt.name,sep=""))
for (i in 1:niter) {
if (verbose) message("iteration ", i)
fit<-lme(fixed=fixed,random=random,data=data,correlation=correlation,
control=control,weights=varFixed(w.formula),method="ML",...)
etaold <- eta
eta <- fitted(fit) + off
if (sum((eta - etaold)^2) < 1e-06 * sum(eta^2)) break
mu <- fam$linkinv(eta)
mu.eta.val <- fam$mu.eta(eta)
## get pseudodata and insert in `data'
data[[zz.name]] <- eta + (fit0$y - mu)/mu.eta.val - off
wz <- w * mu.eta.val^2/fam$variance(mu)
data[[invwt.name]] <- 1/wz
} ## end i in 1:niter
fit
}
gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=list(),weights=NULL, gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=list(),weights=NULL,
subset=NULL,na.action,knots=NULL,control=nlme::lmeControl(niterEM=0,optimMethod="L-BFGS-B"), subset=NULL,na.action,knots=NULL,control=nlme::lmeControl(niterEM=0,optimMethod="L-BFGS-B"),
...@@ -896,7 +959,7 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis ...@@ -896,7 +959,7 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
# NOTE: need to fill out the gam object properly # NOTE: need to fill out the gam object properly
{ if (!require("nlme")) stop("gamm() requires package nlme to be installed") { if (!require("nlme")) stop("gamm() requires package nlme to be installed")
if (!require("MASS")) stop("gamm() requires package MASS to be installed") # if (!require("MASS")) stop("gamm() requires package MASS to be installed")
# check that random is a named list # check that random is a named list
if (!is.null(random)) if (!is.null(random))
{ if (is.list(random)) { if (is.list(random))
...@@ -1001,7 +1064,8 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis ...@@ -1001,7 +1064,8 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
if (lme.used) if (lme.used)
{ ## following construction is a work-around for problem in nlme 3-1.52 { ## following construction is a work-around for problem in nlme 3-1.52
eval(parse(text=paste("ret$lme<-lme(",deparse(fixed.formula), eval(parse(text=paste("ret$lme<-lme(",deparse(fixed.formula),
",random=rand,data=strip.offset(mf),correlation=correlation,control=control,weights=weights,method=method)" ",random=rand,data=strip.offset(mf),correlation=correlation,",
"control=control,weights=weights,method=method)"
,sep="" ))) ,sep="" )))
##ret$lme<-lme(fixed.formula,random=rand,data=mf,correlation=correlation,control=control) ##ret$lme<-lme(fixed.formula,random=rand,data=mf,correlation=correlation,control=control)
} else } else
...@@ -1009,8 +1073,9 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis ...@@ -1009,8 +1073,9 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
if (inherits(weights,"varFunc")) if (inherits(weights,"varFunc"))
stop("weights must be like glm weights for generalized case") stop("weights must be like glm weights for generalized case")
if (verbosePQL) cat("\n Maximum number of PQL iterations: ",niterPQL,"\n") if (verbosePQL) cat("\n Maximum number of PQL iterations: ",niterPQL,"\n")
eval(parse(text=paste("ret$lme<-glmmPQL(",deparse(fixed.formula), eval(parse(text=paste("ret$lme<-gammPQL(",deparse(fixed.formula),
",random=rand,data=strip.offset(mf),family=family,correlation=correlation,control=control,", ",random=rand,data=strip.offset(mf),family=family,",
"correlation=correlation,control=control,",
"weights=weights,niter=niterPQL,verbose=verbosePQL)",sep=""))) "weights=weights,niter=niterPQL,verbose=verbosePQL)",sep="")))
##ret$lme<-glmmPQL(fixed.formula,random=rand,data=mf,family=family,correlation=correlation, ##ret$lme<-glmmPQL(fixed.formula,random=rand,data=mf,family=family,correlation=correlation,
...@@ -1069,9 +1134,6 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis ...@@ -1069,9 +1134,6 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
V<-extract.lme.cov2(ret$lme,mf,n.sr+1) # the data covariance matrix, excluding smooths V<-extract.lme.cov2(ret$lme,mf,n.sr+1) # the data covariance matrix, excluding smooths
XVX <- formXtViX(V,G$Xf) XVX <- formXtViX(V,G$Xf)
# Cv<-chol(V)
# X<-G$Xf
# Z<-backsolve(Cv,X,transpose=TRUE)
S<-matrix(0,ncol(G$Xf),ncol(G$Xf)) # penalty matrix S<-matrix(0,ncol(G$Xf),ncol(G$Xf)) # penalty matrix
first <- G$nsdf+1 first <- G$nsdf+1
k <- 1 k <- 1
...@@ -1089,7 +1151,6 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis ...@@ -1089,7 +1151,6 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
first <- last + 1 first <- last + 1
} }
S<-S/ret$lme$sigma^2 # X'V^{-1}X divided by \sigma^2, so should S be S<-S/ret$lme$sigma^2 # X'V^{-1}X divided by \sigma^2, so should S be
# Z<-t(Z)%*%Z # X'V^{-1}X # this was XVX
Vb <- chol2inv(chol(XVX+S)) # covariance matrix - in constraint space Vb <- chol2inv(chol(XVX+S)) # covariance matrix - in constraint space
# need to project out of constraint space # need to project out of constraint space
Vp <- matrix(Vb[1:G$nsdf,],G$nsdf,ncol(Vb)) Vp <- matrix(Vb[1:G$nsdf,],G$nsdf,ncol(Vb))
......
...@@ -747,6 +747,7 @@ gam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n ...@@ -747,6 +747,7 @@ gam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
} else ## do performance iteration.... } else ## do performance iteration....
object<-gam.fit(G,family=G$family,control=control,gamma=gamma,fixedSteps=fixedSteps,...) object<-gam.fit(G,family=G$family,control=control,gamma=gamma,fixedSteps=fixedSteps,...)
# fill returned s.p. array with estimated and supplied terms # fill returned s.p. array with estimated and supplied terms
temp.sp<-object$sp temp.sp<-object$sp
object$sp<-G$all.sp object$sp<-G$all.sp
...@@ -772,6 +773,21 @@ gam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n ...@@ -772,6 +773,21 @@ gam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
temp.sp <- G$all.sp temp.sp <- G$all.sp
temp.sp[G$all.sp<0] <- object$sp # copy estimated sp's into whole vector temp.sp[G$all.sp<0] <- object$sp # copy estimated sp's into whole vector
object$sp <- temp.sp # correct object sp vector object$sp <- temp.sp # correct object sp vector
} else { ## check for all fixed sp case ...
if (!G$am && (method$gam=="perf.outer"||method$gam=="outer")) {
## need to fix up GCV/UBRE score
if (G$sig2>0) {criterion <- "UBRE";scale <- G$sig2} else {
criterion <- method$gcv;scale <- -1}
if (criterion=="UBRE") object$gcv.ubre <- object$deviance/G$n - scale +
2 * gamma * scale* sum(object$edf)/G$n else
if (criterion=="deviance") object$gcv.ubre <- G$n *
object$deviance/(G$n-sum(object$edf))^2 else
if (criterion=="GACV") {
P <- sum(object$weights*object$residuals^2)
tau <- sum(object$edf)
object$gcv.ubre <- object$deviance/G$n + 2 * gamma*tau * P / (G$n*(G$n-tau))
}
}
} }
## correct null deviance if there's an offset .... ## correct null deviance if there's an offset ....
......
1.3-28
* `gamm' modified to call a routine `gammPQL' in place of MASS::glmmPQL.
This avoids some duplication, and facilitates maintainance.
* Bug fix in `formXtViX' where matrix dimensions got dropped when
subsetting thereby messing up variance calculations for gamm fits in
which some group sizes were 1.
1.3-27 1.3-27
* Fix of nasty bug in large dataset handling with "tp" basis. Subsampling * Fix of nasty bug in large dataset handling with "tp" basis (introduced
code was re-seeding RNG instead of intended behaviour of saving RNG in 1.3-26). Subsampling code was re-seeding RNG instead of intended
state and restoring it. Fixed and tested. behaviour of saving RNG state and restoring it. Fixed and tested.
1.3-26 1.3-26
* modification to `gam' so that GCV/UBRE scores reported with all fixed
smoothing parameters are consistent with equivalent under s.p.
estimation.
* gam.fit3 modified to test for convergence of coefficients as well * gam.fit3 modified to test for convergence of coefficients as well
as penalized deviance, otherwise in extreme cases the derivative as penalized deviance, otherwise in extreme cases the derivative
iterations can diverge. iterations can diverge.
......
...@@ -47,7 +47,7 @@ one of the standard families. ...@@ -47,7 +47,7 @@ one of the standard families.
\seealso{ \seealso{
\code{\link{gam.fit3}}} \code{\link{gam.fit3}}}
}
\keyword{models} \keyword{regression}%-- one or more .. \keyword{models} \keyword{regression}%-- one or more ..
......
...@@ -199,9 +199,10 @@ Wang, Y. (1998) Mixed effects smoothing spline analysis of variance. J.R. Statis ...@@ -199,9 +199,10 @@ Wang, Y. (1998) Mixed effects smoothing spline analysis of variance. J.R. Statis
\section{WARNINGS }{ \section{WARNINGS }{
This version of \code{gamm} works best with \code{nlme} 3.1-62 and above and R \code{gamm} assumes that you know what you are doing! For example, unlike
2.2.0 and above. This is because it is designed to work with the optimizers \code{glmmPQL} from \code{MASS} it will return the complete \code{lme} object
used from these versions onwards, rather than the earlier default optimizers. from the working model at convergence of the PQL iteration, including the `log
likelihood', even though this is not the likelihood of the fitted GAMM.
The routine will be very slow and memory intensive if correlation structures The routine will be very slow and memory intensive if correlation structures
are used for the very large groups of data. e.g. attempting to run the are used for the very large groups of data. e.g. attempting to run the
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment