Commit b568af2d authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.7-19

parent b5dda8c1
Package: mgcv
Version: 1.7-18
Version: 1.7-19
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: Mixed GAM Computation Vehicle with GCV/AIC/REML smoothness
......@@ -14,6 +14,6 @@ Suggests: nlme (>= 3.1-64), splines, Matrix, parallel
LazyLoad: yes
ByteCompile: yes
License: GPL (>= 2)
Packaged: 2012-06-12 08:47:36 UTC; sw283
Packaged: 2012-07-18 06:24:25 UTC; sw283
Repository: CRAN
Date/Publication: 2012-06-12 11:02:44
Date/Publication: 2012-07-20 05:00:31
d2d8a4e916f2f1161c5a07902e09fff2 *DESCRIPTION
882792073c8f32da732e6450ae7d1c12 *DESCRIPTION
50152051f123a389d421aa3130dce252 *NAMESPACE
3af17533752a84864d46354712372ea5 *R/bam.r
5a15ef36318076ec076a7aef2a1bd251 *R/fast-REML.r
b160632e8f38fa99470e2f8cba82a495 *R/gam.fit3.r
7b5db03a3a80878eb8007f8c0848583a *R/bam.r
e59ee1842b4546cb64ebe39e6a7f00ee *R/fast-REML.r
2073e50d30aacc57abac1db8b3133467 *R/gam.fit3.r
902657a0ee2dedc3fdfa501bf3b37c5b *R/gam.sim.r
ad57e83090b4633ee50041fd3571c016 *R/gamm.r
7aa8babce7db388dd66d75d72e3007d2 *R/mgcv.r
3527e64fa9d32baba5644c69c7d1eaf6 *R/gamm.r
41fc0ffd73b6f2ee4e18071134216299 *R/mgcv.r
958a9a13b359c6e6f23f340a05d98899 *R/plots.r
b662840dd06dc05bca6930ce0864d3b4 *R/smooth.r
77f6910e5b8b1aeb997006ef7ad20b5c *R/smooth.r
fb66d6c18398411a99ffcb788b854f13 *R/sparse.r
a5d19ababeb66e23a576ebdb17b14e2d *changeLog
f97ab579ec183613a62c338e34e31e61 *changeLog
e468195a83fab90da8e760c2c3884bd3 *data/columb.polys.rda
40874e3ced720a596750f499ded8a60a *data/columb.rda
88d77139cc983317b6acd8c5f1252ab9 *gnugpl2.txt
......@@ -18,7 +18,7 @@ f693920e12f8a6f2b6cab93648628150 *index
5bf12ddc0dab9daae72271b96a15c539 *man/Predict.matrix.Rd
c51c9b8c9c73f81895176ded39b91394 *man/Predict.matrix.cr.smooth.Rd
612ab6354541ebe38a242634d73b66ba *man/Tweedie.Rd
68cba9c7ec24688d2df4bb5a29b22564 *man/anova.gam.Rd
0d24940b2e0a3e9fa7b3dc0224d049ab *man/anova.gam.Rd
585d6b6a2e7816df83d5f9193195a233 *man/bam.Rd
b385d6d5419d0d6aefe03af1a79d5c4e *man/bam.update.Rd
4e925cb579f4693d1b8ec2d5092c0b37 *man/cSplineDes.Rd
......@@ -28,7 +28,7 @@ c03748964ef606621418e428ae49b103 *man/columb.Rd
f764fb7cb9e63ff341a0075a3854ab5d *man/exclude.too.far.Rd
702afc89845d640c9359253e394bcadc *man/extract.lme.cov.Rd
44ad0563add1c560027d502ce41483f5 *man/fix.family.link.Rd
75373268c1203ee110e1eede633752aa *man/fixDependence.Rd
4d4eea9ad2b78e765a96b2a0065725c1 *man/fixDependence.Rd
9ac808f5a2a43cf97f24798c0922c9bf *man/formXtViX.Rd
bb099e6320a6c1bd79fe4bf59e0fde08 *man/formula.gam.Rd
6f405acde2d7b6f464cf45f5395113ba *man/full.score.Rd
......@@ -44,9 +44,9 @@ e969287d1a5c281faa7eb6cfce31a7c5 *man/gam.outer.Rd
76651917bd61fc6bc447bbb40b887236 *man/gam.side.Rd
78588cf8ed0af8eca70bba3bbed64dbe *man/gam.vcomp.Rd
a66a814cc4c6f806e824751fda519ae0 *man/gam2objective.Rd
4d5b3b1266edc31ce3b0e6be11ee9166 *man/gamObject.Rd
807a8e175d84ba3eac87f8ef2931f6e4 *man/gamObject.Rd
0ac5fb78c9db628ce554a8f68588058c *man/gamSim.Rd
6078c49c55f4e7ce20704e4fbe3bba8a *man/gamm.Rd
9c301060b3c35bc6ab83e6f38ce8d3b6 *man/gamm.Rd
1f5d723f2fa931297940e1a4a840d792 *man/get.var.Rd
a2ea1233d43fac89e0cacbc09a8d31e2 *man/in.out.Rd
2f222eeeb3d7bc42f93869bf8c2af58a *man/influence.gam.Rd
......@@ -56,9 +56,9 @@ aba56a0341ba9526a302e39d33aa9042 *man/interpret.gam.Rd
58e73ac26b93dc9d28bb27c8699e12cf *man/linear.functional.terms.Rd
5de18c3ad064a5bda4f9027d9455170a *man/logLik.gam.Rd
611f5f6acac9c5f40869c01cf7f75dd3 *man/ls.size.Rd
c4d7e46cead583732e391d680fecc572 *man/magic.Rd
396548e4a40652c54e80751acbfb2e2c *man/magic.Rd
496388445d8cde9b8e0c3917cbe7461d *man/magic.post.proc.Rd
d564d1c5b2f780844ff10125348f2e2c *man/mgcv-FAQ.Rd
9427464e63d70da15fc468961ef0c29b *man/mgcv-FAQ.Rd
41df245a5821b3964db4c74b1930c0fe *man/mgcv-package.Rd
18a9858b6f3ffde288b0bf9e1a5da2f6 *man/model.matrix.gam.Rd
bc9b89db7e7ff246749551c16f5f1f07 *man/mono.con.Rd
......@@ -95,14 +95,14 @@ f77ca1471881d2f93c74864d076c0a0e *man/rTweedie.Rd
d202c6718fb1138fdd99e6102250aedf *man/smooth.construct.re.smooth.spec.Rd
53d7986dd7a54c1edb13ce84dbfe34a2 *man/smooth.construct.sos.smooth.spec.Rd
8672633a1fad8df3cb1f53d7fa883620 *man/smooth.construct.tensor.smooth.spec.Rd
4b9bd43c3acbab6ab0159d59967e19db *man/smooth.construct.tp.smooth.spec.Rd
76ca359e3eda6e32398da0d6cdf9903b *man/smooth.construct.tp.smooth.spec.Rd
1de9c315702476fd405a85663bb32d1c *man/smooth.terms.Rd
0d12daea17e0b7aef8ab89b5f801adf1 *man/smoothCon.Rd
5ae47a140393009e3dba7557af175170 *man/sp.vcov.Rd
83bd8e097711bf5bd0fff09822743d43 *man/spasm.construct.Rd
700699103b50f40d17d3824e35522c85 *man/step.gam.Rd
01a16d9badcb601166998d0893f75cfc *man/summary.gam.Rd
7f383eaaca246c8bf2d5b74d841f7f8a *man/t2.Rd
b8a08ccd4c9371ff2735b5aefd3058b2 *man/summary.gam.Rd
6fbcf448bc473a870fde0025db272c7a *man/t2.Rd
04076444b2c99e9287c080298f9dc1d7 *man/te.Rd
c3c23641875a293593fe4ef032b44aae *man/tensor.prod.model.matrix.Rd
fbd45cbb1931bdb5c0de044e22fdd028 *man/uniquecombs.Rd
......@@ -120,12 +120,12 @@ cd54024d76a9b53dc17ef26323fc053f *src/Makevars
94a2bcbb75cc60e8460e72ed154678c9 *src/gdi.c
49af97195accb65adc75620183d39a4c *src/general.h
6f301e977834b4743728346184ea11ba *src/init.c
7f9fcb495707a003817e78f4802ceeba *src/magic.c
1635205c9d5bb9a5713663775f47fd2a *src/magic.c
066af9db587e5fe6e5cc4ff8c09ae9c2 *src/mat.c
de0ae24ea5cb533640a3ab57e0383595 *src/matrix.c
0f8448f67d16668f9027084a2d9a1b52 *src/matrix.h
6a9f57b44d2aab43aa32b01ccb26bd6e *src/mgcv.c
c62652f45ad1cd3624a849005858723a *src/mgcv.h
9a7ab05c053c7e1cb03148863d0980cb *src/mgcv.h
fcbe85d667f8c7818d17509a0c3c5935 *src/misc.c
7e0ba698a21a01150fda519661ef9857 *src/qp.c
cd563899be5b09897d1bf36a7889caa0 *src/qp.h
......
......@@ -401,6 +401,7 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, coef=NULL,etas
object$coefficients <- fit$b
object$edf <- post$edf
object$edf1 <- post$edf1
object$F <- post$F
object$full.sp <- fit$sp.full
object$gcv.ubre <- fit$score
object$hat <- post$hat
......@@ -429,6 +430,7 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, coef=NULL,etas
res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=TRUE,scale=scale)
object$edf <- res$edf
object$edf1 <- res$edf1
object$F <- res$F
object$hat <- res$hat
object$Vp <- res$Vp
object$Ve <- res$Ve
......@@ -446,7 +448,7 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, coef=NULL,etas
if (any(mu < eps))
warning("fitted rates numerically 0 occurred")
}
object$R <- qrx$R
object$iter <- iter
object$wt <- wt
object$y <- G$y
......@@ -575,6 +577,7 @@ bgam.fit2 <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NU
object$coefficients <- fit$b
object$edf <- post$edf
object$edf1 <- post$edf1
object$F <- post$F
object$full.sp <- fit$sp.full
object$gcv.ubre <- fit$score
object$hat <- post$hat
......@@ -614,6 +617,7 @@ bgam.fit2 <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NU
object$iter <- iter
object$wt <- w
object$R <- qrx$R
object$y <- G$y
rm(G);gc()
object
......@@ -915,7 +919,7 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level
log.phi=log.phi,phi.fixed=scale>0,rss.extra=rss.extra,
nobs =n,Mp=um$Mp)
res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=TRUE,scale=scale)
object <- list(coefficients=res$beta,edf=res$edf,edf1=res$edf1,
object <- list(coefficients=res$beta,edf=res$edf,edf1=res$edf1,F=res$F,
gcv.ubre=fit$reml,hat=res$hat,mgcv.conv=list(iter=fit$iter,
message=fit$conv),rank=ncol(um$X),
Ve=res$Ve,Vp=res$Vp,scale.estimated = scale<=0,outer.info=fit$outer.info,
......@@ -961,6 +965,7 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level
object$coefficients <- fit$b
object$edf <- post$edf
object$edf1 <- post$edf1
object$F <- post$F
object$full.sp <- fit$sp.full
object$gcv.ubre <- fit$score
object$hat <- post$hat
......@@ -982,7 +987,7 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level
object$yX.last <- yX.last
}
object$R <- qrx$R
object$gamma <- gamma;object$G <- G;object$qrx <- qrx ## to allow updating of the model
object$y <- mf[[gp$response]]
object$iter <- 1
......@@ -1100,6 +1105,9 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
idLinksBases=TRUE,scale.penalty=control$scalePenalty,
paraPen=paraPen)
## no advantage to "fREML" with no free smooths...
if (((!is.null(G$L)&&ncol(G$L) < 1)||(length(G$sp)==0))&&method=="fREML") method <- "REML"
G$var.summary <- var.summary
G$family <- family
G$terms<-terms;G$pterms<-pterms
......@@ -1209,8 +1217,8 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
object$terms <- G$terms
object$var.summary <- G$var.summary
object$weights <- object$prior.weights
if (is.null(object$wt)) object$weights <- object$prior.weights else
object$weights <- object$wt
object$xlevels <- G$xlevels
#object$y <- object$model[[gp$response]]
object$NA.action <- na.action ## version to use in bam.update
......@@ -1228,12 +1236,12 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
object$linear.predictors <- as.numeric(predict.bam(object,newdata=object$model,block.size=chunk.size,cluster=cluster))
object$fitted.values <- family$linkinv(object$linear.predictors)
object$residuals <- sqrt(family$dev.resids(object$y,object$fitted.values,object$weights)) *
object$residuals <- sqrt(family$dev.resids(object$y,object$fitted.values,object$prior.weights)) *
sign(object$y-object$fitted.values)
object$deviance <- sum(object$residuals^2)
object$aic <- family$aic(object$y,1,object$fitted.values,object$weights,object$deviance) +
object$aic <- family$aic(object$y,1,object$fitted.values,object$prior.weights,object$deviance) +
2*sum(object$edf)
object$null.deviance <- sum(family$dev.resids(object$y,mean(object$y),object$weights))
object$null.deviance <- sum(family$dev.resids(object$y,mean(object$y),object$prior.weights))
if (!is.null(object$full.sp)) {
if (length(object$full.sp)==length(object$sp)&&
all.equal(object$sp,object$full.sp)==TRUE) object$full.sp <- NULL
......@@ -1337,7 +1345,7 @@ bam.update <- function(b,data,chunk.size=10000) {
res <- Sl.postproc(b$Sl,fit,um$undrop,b$qrx$R,cov=TRUE,scale=scale)
object <- list(coefficients=res$beta,edf=res$edf,
object <- list(coefficients=res$beta,edf=res$edf,edf1=res$edf1,F=res$F,
gcv.ubre=fit$reml,hat=res$hat,outer.info=list(iter=fit$iter,
message=fit$conv),optimizer="fast-REML",rank=ncol(um$X),
Ve=NULL,Vp=res$V,scale.estimated = scale<=0)
......@@ -1386,6 +1394,8 @@ bam.update <- function(b,data,chunk.size=10000) {
b$coefficients <- fit$b
b$edf <- post$edf
b$edf1 <- post$edf1
b$F <- post$F
b$full.sp <- fit$sp.full
b$gcv.ubre <- fit$score
b$hat <- post$hat
......@@ -1400,6 +1410,8 @@ bam.update <- function(b,data,chunk.size=10000) {
} else { ## REML or ML
b$coefficients <- object$coefficients
b$edf <- object$edf
b$edf1 <- object$edf1
b$F <- object$F
b$full.sp <- object$sp.full
b$gcv.ubre <- object$gcv.ubre
b$hat <- object$hat
......@@ -1414,15 +1426,17 @@ bam.update <- function(b,data,chunk.size=10000) {
}
}
b$R <- b$qrx$R
b$G$X <- NULL
b$linear.predictors <- as.numeric(predict.gam(b,newdata=b$model,block.size=chunk.size))
b$fitted.values <- b$linear.predictor ## strictly additive only!
b$residuals <- sqrt(b$family$dev.resids(b$y,b$fitted.values,b$weights)) *
b$residuals <- sqrt(b$family$dev.resids(b$y,b$fitted.values,b$prior.weights)) *
sign(b$y-b$fitted.values)
b$deviance <- sum(b$residuals^2)
b$aic <- b$family$aic(b$y,1,b$fitted.values,b$weights,b$deviance) + 2 * sum(b$edf)
b$null.deviance <- sum(b$family$dev.resids(b$y,mean(b$y),b$weights))
b$aic <- b$family$aic(b$y,1,b$fitted.values,b$prior.weights,b$deviance) + 2 * sum(b$edf)
b$null.deviance <- sum(b$family$dev.resids(b$y,mean(b$y),b$prior.weights))
names(b$coefficients) <- names(b$edf) <- cnames
b
} ## end of bam.update
......
......@@ -768,7 +768,8 @@ Sl.postproc <- function(Sl,fit,undrop,X0,cov=FALSE,scale = -1) {
## edf <- rowSums(PP*crossprod(X0)) ## diag(PP%*%(t(X0)%*%X0))
if (scale<=0) scale <- fit$rss/(fit$nobs - sum(edf))
Vp <- PP * scale ## cov matrix
return(list(beta=beta,Vp=Vp,Ve=F%*%Vp,edf=edf,edf1=edf1,hat=hat))
##bias <- as.numeric(beta-F%*%beta) ## estimate of smoothing bias in beta
return(list(beta=beta,Vp=Vp,Ve=F%*%Vp,edf=edf,edf1=edf1,hat=hat,F=F))
} else return(list(beta=beta))
}
......
......@@ -703,6 +703,7 @@ gam.fit3 <- function (x, y, sp, Eb,UrS=list(),
gam.fit3.post.proc <- function(X,object) {
## get edf array and covariance matrices after a gam fit.
## X is original model matrix
Vb <- object$rV%*%t(object$rV)*object$scale ## Bayesian cov.
PKt <- object$rV%*%t(object$K)
F <- PKt%*%(sqrt(object$weights)*X)
......@@ -712,7 +713,13 @@ gam.fit3.post.proc <- function(X,object) {
## Ve <- PKt%*%t(PKt)*object$scale ## frequentist cov
Ve <- F%*%Vb ## not quite as stable as above, but quicker
hat <- rowSums(object$K*object$K)
list(Vb=Vb,Ve=Ve,edf=edf,edf1=edf1,hat=hat)
## get QR factor R of WX - more efficient to do this
## in gdi_1 really, but that means making QR of augmented
## a two stage thing, so not clear cut...
qrx <- qr(sqrt(object$weights)*X,LAPACK=TRUE)
R <- qr.R(qrx);R[,qrx$pivot] <- R
##bias = as.numeric(object$coefficients - F%*%object$coefficients)
list(Vb=Vb,Ve=Ve,edf=edf,edf1=edf1,hat=hat,F=F,R=R)
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -324,7 +324,7 @@ te <- function(..., k=NA,bs="cr",m=NA,d=NA,by=NA,fx=FALSE,mp=TRUE,np=TRUE,xt=NUL
ret
} ## end of te
t2 <- function(..., k=NA,bs="cr",m=NA,d=NA,by=NA,xt=NULL,id=NULL,sp=NULL,full=FALSE)
t2 <- function(..., k=NA,bs="cr",m=NA,d=NA,by=NA,xt=NULL,id=NULL,sp=NULL,full=FALSE,ord=NULL)
# function for use in gam formulae to specify a type 2 tensor product smooth term.
# e.g. te(x0,x1,x2,k=c(5,4,4),bs=c("tp","cr","cr"),m=c(1,1,2),by=x3) specifies a rank 80 tensor
# product spline. The first basis is rank 5, t.p.r.s. basis penalty order 1, and the next 2 bases
......@@ -407,6 +407,15 @@ t2 <- function(..., k=NA,bs="cr",m=NA,d=NA,by=NA,xt=NULL,id=NULL,sp=NULL,full=FA
margin[[i]]<- eval(parse(text=stxt)) # NOTE: fx and by not dealt with here!
j<-j1+1
}
# check ord argument
if (!is.null(ord)) {
if (sum(ord%in%0:n.bases)==0) {
ord <- NULL
warning("ord is wrong. reset to NULL.")
}
if (sum(ord<0)>0||sum(ord>n.bases)>0) warning("ord contains out of range orders (which will be ignored)")
}
# assemble term.label
full.call<-paste("t2(",term[1],sep="")
......@@ -422,7 +431,7 @@ t2 <- function(..., k=NA,bs="cr",m=NA,d=NA,by=NA,xt=NULL,id=NULL,sp=NULL,full=FA
full <- as.logical(full)
if (is.na(full)) full <- FALSE
ret<-list(margin=margin,term=term,by=by.var,fx=fx,label=label,dim=dim,
id=id,sp=sp,full=full)
id=id,sp=sp,full=full,ord=ord)
class(ret) <- "t2.smooth.spec"
ret
} ## end of t2
......@@ -639,7 +648,7 @@ Predict.matrix.tensor.smooth<-function(object,data)
## Type 2 tensor product methods start here - separate identity penalties
#########################################################################
t2.model.matrix <- function(Xm,rank,full=TRUE) {
t2.model.matrix <- function(Xm,rank,full=TRUE,ord=NULL) {
## Xm is a list of marginal model matrices.
## The first rank[i] columns of Xm[[i]] are penalized,
## by a ridge penalty, the remainder are unpenalized.
......@@ -650,8 +659,11 @@ t2.model.matrix <- function(Xm,rank,full=TRUE) {
## the construction. Otherwise there is an element of arbitrariness
## in the invariance, as it depends on scaling of the null space
## columns.
## ord is the list of term orders to include. NULL indicates all
## terms are to be retained.
Zi <- Xm[[1]][,1:rank[1],drop=FALSE] ## range space basis for first margin
X2 <- list(Zi)
order <- 1 ## record order of component (number of range space components)
lab2 <- "r" ## list of term labels "r" denotes range space
null.exists <- rank[1] < ncol(Xm[[1]]) ## does null exist for margin 1
no.null <- FALSE
......@@ -664,7 +676,7 @@ t2.model.matrix <- function(Xm,rank,full=TRUE) {
}
X2[[2]] <- Xi ## working model matrix component list
lab2[2]<- "n" ## "n" is null space
order[2] <- 0
} else no.null <- TRUE ## tensor product will have *no* null space...
n.m <- length(Xm) ## number of margins
......@@ -680,6 +692,7 @@ t2.model.matrix <- function(Xm,rank,full=TRUE) {
X1 <- X2
if (full) pen1 <- pen2
lab1 <- lab2 ## labels
order1 <- order
k <- 1
for (ii in 1:length(X1)) { ## form products with Zi
if (!full || pen1[ii]) { ## X1[[ii]] is penalized and treated as a whole
......@@ -688,12 +701,14 @@ t2.model.matrix <- function(Xm,rank,full=TRUE) {
X2[[k]] <- A
if (full) pen2[k] <- TRUE
lab2[k] <- paste(lab1[ii],"r",sep="")
order[k] <- order1[ii] + 1
k <- k + 1
} else { ## X1[[ii]] is un-penalized, columns to be treated separately
cnx1 <- colnames(X1[[ii]])
for (j in 1:ncol(X1[[ii]])) {
X2[[k]] <- X1[[ii]][,j]*Zi
lab2[k] <- paste(cnx1[j],"r",sep="")
order[k] <- order1[ii] + 1
pen2[k] <- TRUE
k <- k + 1
}
......@@ -714,6 +729,7 @@ t2.model.matrix <- function(Xm,rank,full=TRUE) {
}
if (full) colnames(A) <- cnx2
lab2[k] <- paste(lab1[ii],"n",sep="")
order[k] <- order1[ii]
X2[[k]] <- A;
if (full) pen2[k] <- FALSE ## if full, you only get to here when pen1[i] FALSE
k <- k + 1
......@@ -721,7 +737,8 @@ t2.model.matrix <- function(Xm,rank,full=TRUE) {
cnxi <- colnames(Xi)
for (j in 1:ncol(Xi)) {
X2[[k]] <- X1[[ii]]*Xi[,j]
lab2[k] <- paste(lab1[ii],cnxi[j],sep="")
lab2[k] <- paste(lab1[ii],cnxi[j],sep="") ## null space labels => order unchanged
order[k] <- order1[ii]
pen2[k] <- TRUE
k <- k + 1
}
......@@ -729,10 +746,17 @@ t2.model.matrix <- function(Xm,rank,full=TRUE) {
}
} ## finished dealing with null space for this margin
} ## finished working through margins
rm(X1)
## X2 now contains a sequence of model matrices, all but the last
## should have an associated ridge penalty.
if (!is.null(ord)) { ## may need to drop some terms
ii <- order %in% ord ## terms to retain
X2 <- X2[ii]
lab2 <- lab2[ii]
if (sum(ord==0)==0) no.null <- TRUE ## null space dropped
}
xc <- unlist(lapply(X2,ncol)) ## number of columns of sub-matrix
X <- matrix(unlist(X2),n,sum(xc))
if (!no.null) {
......@@ -791,7 +815,7 @@ smooth.construct.t2.smooth.spec <- function(object,data,knots)
## Create the model matrix...
X <- t2.model.matrix(Xm,r,full=object$full)
X <- t2.model.matrix(Xm,r,full=object$full,ord=object$ord)
sub.cols <- attr(X,"sub.cols") ## size (cols) of penalized sub blocks
......@@ -864,7 +888,7 @@ Predict.matrix.t2.smooth <- function(object,data)
X[[i]]<-Predict.matrix(object$margin[[i]],dat)%*%object$P[[i]]
rank[i] <- object$margin[[i]]$rank
}
T <- t2.model.matrix(X,rank,full=object$full)
T <- t2.model.matrix(X,rank,full=object$full,ord=object$ord)
T
} ## end of Predict.matrix.t2.smooth
......@@ -1019,8 +1043,14 @@ smooth.construct.tp.smooth.spec<-function(object,data,knots)
}
} ## end of large data set handling
##if (object$bs.dim[1]<0) object$bs.dim <- 10*3^(object$dim-1) # auto-initialize basis dimension
object$p.order[is.na(object$p.order)] <- 0 ## auto-initialize
M<-null.space.dimension(object$dim,object$p.order)
M <- null.space.dimension(object$dim,object$p.order[1])
if (length(object$p.order)>1&&object$p.order[2]==0) object$drop.null <- M else
object$drop.null <- 0
def.k <- c(8,27,100) ## default penalty range space dimension for different dimensions
dd <- min(object$dim,length(def.k))
if (object$bs.dim[1]<0) object$bs.dim <- M+def.k[dd] ##10*3^(object$dim-1) # auto-initialize basis dimension
......@@ -1049,12 +1079,7 @@ smooth.construct.tp.smooth.spec<-function(object,data,knots)
{ object$S[[1]]<-matrix(oo$S,k,k) # penalty matrix
object$S[[1]]<-(object$S[[1]]+t(object$S[[1]]))/2 # ensure exact symmetry
if (!is.null(shrink)) # then add shrinkage term to penalty
{ ## pre- 1.5 code the identity term could dominate the small eigenvales
## and really mess up the penalty...
## norm <- mean(object$S[[1]]^2)^0.5
## object$S[[1]] <- object$S[[1]] + diag(k)*norm*abs(shrink)
## Modify the penalty by increasing the penalty on the
{ ## Modify the penalty by increasing the penalty on the
## unpenalized space from zero...
es <- eigen(object$S[[1]],symmetric=TRUE)
## now add a penalty on the penalty null space
......@@ -1073,7 +1098,24 @@ smooth.construct.tp.smooth.spec<-function(object,data,knots)
if (!is.null(shrink)) M <- 0 ## null space now rank zero
object$rank <- k - M # penalty rank
object$null.space.dim <- M
if (object$drop.null>0) {
ind <- 1:(k-M)
if (FALSE) { ## nat param version
np <- nat.param(object$X,object$S[[1]],rank=k-M,type=0)
object$P <- np$P
object$S[[1]] <- diag(np$D)
object$X <- np$X[,ind]
} else { ## original param
object$S[[1]] <- object$S[[1]][ind,ind]
object$X <- object$X[,ind]
object$cmX <- colMeans(object$X)
object$X <- sweep(object$X,2,object$cmX)
}
object$null.space.dim <- 0
object$df <- object$df - M
object$bs.dim <- object$bs.dim -M
object$C <- matrix(0,0,ncol(object$X)) # null constraint matrix
}
class(object) <- "tprs.smooth"
object
}
......@@ -1101,14 +1143,27 @@ Predict.matrix.tprs.smooth<-function(object,data)
by<-0;by.exists<-FALSE
## following used to be object$null.space.dim, but this is now *post constraint*
M <- null.space.dimension(object$dim,object$p.order)
M <- null.space.dimension(object$dim,object$p.order[1])
ind <- 1:object$bs.dim
if (is.null(object$drop.null)) object$drop.null <- 0 ## pre 1.7_19 compatibility
if (object$drop.null>0) object$bs.dim <- object$bs.dim + M
X<-matrix(0,n,object$bs.dim)
oo<-.C(C_predict_tprs,as.double(x),as.integer(object$dim),as.integer(n),as.integer(object$p.order),
oo<-.C(C_predict_tprs,as.double(x),as.integer(object$dim),as.integer(n),as.integer(object$p.order[1]),
as.integer(object$bs.dim),as.integer(M),as.double(object$Xu),
as.integer(nrow(object$Xu)),as.double(object$UZ),as.double(by),as.integer(by.exists),X=as.double(X))
X<-matrix(oo$X,n,object$bs.dim)
if (object$drop.null>0) {
if (FALSE) { ## nat param
X <- (X%*%object$P)[,ind] ## drop null space
} else { ## original
X <- X[,ind]
X <- sweep(X,2,object$cmX)
}
}
X
}
......@@ -1863,7 +1918,7 @@ smooth.construct.re.smooth.spec <- function(object,data,knots)
## be biased here)! The theoretical problems can result in very
## low power in practice.
object$fr.pval <- TRUE ## use full rank frequentist p-value
object$random <- TRUE ## treat as a random effect for p-value comp.
class(object)<-"random.effect" # Give object a class
......@@ -3051,6 +3106,7 @@ smoothCon <- function(object,data,knots,absorb.cons=FALSE,scale.penalty=TRUE,n=n
for (i in 1:length(sml)) {
sml[[i]]$S[[M+1]] <- Sf
sml[[i]]$rank[M+1] <- sum(ind)
sml[[i]]$null.space.dim <- 0
}
}
}
......
** denotes quite substantial/important changes
*** denotes really big changes
ISSUES:
* F needed in gam object?
* still not really happy with nesting constraints, especially under rank
deficiency.
1.7-19
** summary.gam and anova.gam now use an improved p-value computation for
smooth terms with a zero dimensional penalty null space (including
random effects). The new scheme has been tested by full replication
of the simulation study in Scheipl (2008,CSDA) to compare it to the best
method there-in. In these tests it is at least as powerful as the best
method given there, and usually indistinguishable, but it gives slightly
too low null p-values when smoothing parameters are very poorly identified.
Note that the new p-values can not be computed from old fitted gam objects.
Thanks to Martijn Wieling for pointing out how bad the p-values for regular
smooths could be with random effects.
* t2 terms now take an argument `ord' that allows orders of interaction to
be selected.
* "tp" smooths can now drop the null space from their construction via
a vector m argument, to allow testing against polynomial in the null space.
* Fix of vicious little bug in gamm tensor product handling that could have
a te term pick up the wrong model matrix and fail.
* bam now resets method="fREML" to "REML" if there are no free smoothing
parameters, since there is no advantage to the "fREML" optimizer in this
case, and it assumes there is at least one free smoothing parameter.
* print.gam modified to print effective degrees of freedom more prettily,
* testStat bug fix. qr was called with default arguments, which includes
tol=1e-7...
* bam now correctly returns fitting weights (rather than prior) in weights
field.
1.7-18
* Embarrassingly, the adjusted r^2 computation in summary.gam was wrong
......
......@@ -5,7 +5,9 @@
\title{Approximate hypothesis tests related to GAM fits}
\description{ Performs hypothesis tests relating to one or more fitted
\code{gam} objects. For a single fitted \code{gam} object, Wald tests of
the significance of each parametric and smooth term are performed. Otherwise
the significance of each parametric and smooth term are performed, so interpretation
is analogous to \code{\link{drop1}} rather than \code{anova.lm} (i.e. it's like type III ANOVA,
rather than a sequential type I ANOVA). Otherwise
the fitted models are compared using an analysis of deviance table: this latter approach
should not be use to test the significance of terms which can be penalized
to zero. See details.
......@@ -42,17 +44,21 @@ can be greatly overstated: i.e. p-values are often substantially too low. This c
Note also that in the multi-model call to \code{anova.gam}, it is quite possible for a model with more terms to end up with lower effective degrees of freedom, but better fit, than the notionally null model with fewer terms. In such cases it is very rare that it makes sense to perform any sort of test, since there is then no basis on which to accept the notional null model.
If only one model is provided then the significance of each model term
is assessed using Wald tests: see \code{\link{summary.gam}} for details. The p-values
provided here are better justified than in the multi model case, and have close to the
correct distribution under the null for smooths with a non-zero dimensional null space (i.e. terms that can-not be penalized to zero). ML or REML smoothing parameter selection leads to the best results in simulations as they tend to avoid occasional severe undersmoothing. In the single model case \code{print.anova.gam} is used as the
printing method.
is assessed using Wald tests, conditional on the smoothing parameter estimates: see \code{\link{summary.gam}}
and Wood (in press) for details. The p-values provided here are better justified than in the multi model case, and have close to the
correct distribution under the null, unless smoothing parameters are poorly identified. ML or REML smoothing parameter selection leads to
the best results in simulations as they tend to avoid occasional severe undersmoothing. In replication of the full simulation study of Scheipl et al.
(2008) the tests give almost indistinguishable power to the method recommended there, but slightly too low p-values under the null in their
section 3.1.8 test for a smooth interaction (the Scheipl et al. recommendation is not used directly, because it only applies in the Gaussian case,
and requires model refits, but it is available in package \code{RLRsim}).
In the single model case \code{print.anova.gam} is used as the printing method.
By default the p-values for parametric model terms are also based on Wald tests using the Bayesian
covariance matrix for the coefficients. This is appropriate when there are "re" terms present, and is
otherwise rather similar to the results using the frequentist covariance matrix (\code{freq=TRUE}), since
the parametric terms themselves are usually unpenalized. Default P-values for parameteric terms that are
penalized using the \code{paraPen} argument will not be good. However if such terms represent conventional
random effects with full rank penalties, then setting \code{freq=TRUE} is appropriate.
penalized using the \code{paraPen} argument will not be good.
}
......@@ -66,6 +72,13 @@ which is in fact an object returned from \code{\link{summary.gam}}.
}
\references{
Scheipl, F., Greven, S. and Kuchenhoff, H. (2008) Size and power of tests for a zero random effect variance or polynomial
regression in additive and linear mixed models. Comp. Statist. Data Anal. 52, 3283-3299
Wood, S.N. (2012) On p-values for smooth componentes of an extended generalized additive model. in press Biometrika
}
\author{ Simon N. Wood \email{simon.wood@r-project.org} with substantial
improvements by Henric Nilsson.}
......@@ -75,6 +88,8 @@ p values from anova(a,b) are unreliable, and usually much too low.
Default P-values will usually be wrong for parametric terms penalized using `paraPen': use freq=TRUE
to obtain better p-values when the penalties are full rank and represent conventional random effects.
For a single model, interpretation is similar to drop1, not anova.lm.
}
\seealso{ \code{\link{gam}}, \code{\link{predict.gam}},
......
......@@ -7,7 +7,7 @@ dependent on columns of a matrix \code{X1}. Primarily of use in setting up
identifiability constraints for nested GAMs.
}
\usage{
fixDependence(X1,X2,tol=.Machine$double.eps^.5,rank.def=0)
fixDependence(X1,X2,tol=.Machine$double.eps^.5,rank.def=0,strict=FALSE)
}
%- maybe also `usage' for other objects documented here.
\arguments{
......@@ -18,13 +18,17 @@ fixDependence(X1,X2,tol=.Machine$double.eps^.5,rank.def=0)
\item{rank.def}{If the degree of rank deficiency in \code{X2}, given \code{X1},
is known, then it can be supplied here, and \code{tol} is then ignored.
Unused unless positive and not greater than the number of columns in \code{X2}.}
\item{strict}{if \code{TRUE} then only columns individually dependent on \code{X1} are detected,
if \code{FALSE} then enough columns to make the reduced \code{X2} full rank and
independent of \code{X1} are detected.}
}
\details{ The algorithm uses a simple approach based on QR decomposition: see
Wood (2006, section 4.10.2) for details.
}
\value{ An vector of the columns of \code{X2} which are linearly dependent on
columns of \code{X1}. \code{NULL} if the two matrices are independent.
\value{ A vector of the columns of \code{X2} which are linearly dependent on
columns of \code{X1} (or which need to be deleted to acheive independence and full rank
if \code{strict==FALSE}). \code{NULL} if the two matrices are independent.
}
\author{ Simon N. Wood \email{simon.wood@r-project.org}}
......@@ -43,7 +47,7 @@ X2<-matrix(runif(n*c2),n,c2)
X2[,3]<-X1[,2]+X2[,4]*.1
X2[,5]<-X1[,1]*.2+X1[,2]*.04
fixDependence(X1,X2)
fixDependence(X1,X2,rank.def=2)
fixDependence(X1,X2,strict=TRUE)
}
\keyword{models} \keyword{regression}%-- one or more ..
......
......@@ -67,7 +67,7 @@ from \code{min.sp} argument to \code{gam}). May be larger than \code{sp} if some
smoothing parameters, and/or some smoothing parameter values were supplied in the \code{sp} argument
of \code{\link{gam}}.}
\item{F}{Degrees of freedom matrix. This may be removed at some point, and should probably not be used.}
\item{gcv.ubre}{The minimized smoothing parameter selection score: GCV, UBRE(AIC), GACV, negative log marginal
likelihood or negative log restricted likelihood.}
......@@ -125,6 +125,9 @@ information on the parametric penalties. \code{NULL} otherwise.}
\item{pterms}{\code{terms} object for strictly parametric part of model.}
\item{R}{Factor R from QR decomposition of weighted model matrix, unpivoted to be in
same column order as model matrix (so need not be upper triangular).}
\item{rank}{apparent rank of fitted model.}
\item{reml.scale}{The scale (RE)ML scale parameter estimate, if (P-)(RE)ML used
......@@ -170,7 +173,6 @@ posterior covariance matrix that results from adopting a particular Bayesian
model of the smoothing process. Paricularly useful for creating
credible/confidence intervals.}
\item{weights}{final weights used in IRLS iteration.}
\item{y}{response data.}
......
......@@ -319,6 +319,7 @@ b <- gamm(y~s(x0,bs="cr")+s(x1,bs="cr")+s(x2,bs="cr")+
s(x3,bs="cr"),data=dat,random=list(fa=~1,fb=~1),
correlation=corAR1())
plot(b$gam,pages=1)
summary(b$gam)
vis.gam(b$gam)
## and a "spatial" example...
library(nlme);set.seed(1);n <- 200
......
......@@ -186,7 +186,9 @@ by failing to decrease the score along the search direction.}
\item{iter}{is the number of Newton/Steepest descent iterations taken.}
\item{score.calls}{is the number of times that the GCV/UBRE score had to be evaluated.}