Commit d3eb4372 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.7-12

parent d21e4993
Package: mgcv
Version: 1.7-11
Version: 1.7-12
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: GAMs with GCV/AIC/REML smoothness estimation and GAMMs by PQL
......@@ -8,11 +8,12 @@ Description: Routines for GAMs and other generalized ridge regression
or UBRE/AIC. Also GAMMs by REML or PQL. Includes a gam()
function.
Priority: recommended
Depends: R (>= 2.3.0)
Depends: R (>= 2.14.0)
Imports: graphics, stats, nlme, Matrix
Suggests: nlme (>= 3.1-64), splines, Matrix
Suggests: nlme (>= 3.1-64), splines, Matrix, parallel
LazyLoad: yes
ByteCompile: yes
License: GPL (>= 2)
Packaged: 2011-11-14 07:17:20 UTC; simon
Packaged: 2011-12-11 21:23:23 UTC; sw283
Repository: CRAN
Date/Publication: 2011-11-14 09:58:12
Date/Publication: 2011-12-12 12:21:51
dc584e91cdb041efbad55880e8e336f1 *DESCRIPTION
af5be24b4a8a4b948f42d61631c198fc *DESCRIPTION
abc5e760b0f5dd22c81d7614c0b304f0 *NAMESPACE
4553efb64e4def029f7c5bdfc0168936 *R/bam.r
166f5e589a85cd72305f281e0bc08626 *R/gam.fit3.r
ecfb144fb5214dde68dffac22f219a1f *R/bam.r
b160632e8f38fa99470e2f8cba82a495 *R/gam.fit3.r
902657a0ee2dedc3fdfa501bf3b37c5b *R/gam.sim.r
2abf5b43692bc0df458a2ddc1b0c6dca *R/gamm.r
0e48e7ed141719f4d9e6476e54efaa79 *R/mgcv.r
e137c06cabb48551c18cf0cc3512d297 *R/gamm.r
da8dd27eb3b4b59a46ef58c789a38e56 *R/mgcv.r
bf70158e37e33ea1136efdeab97569f8 *R/plots.r
ceed2e84bd4a17f7da324134ebe1b6b4 *R/smooth.r
8e2c7eddd2a206d05d708d4496f7eb31 *R/smooth.r
fe9745f610246ee1f31eb915ca0d76a9 *R/sparse.r
b3f865a351f7e840e79dfaaaa81d5378 *changeLog
c5051a5b85fa7e9fa8ffdf9d52339408 *changeLog
e468195a83fab90da8e760c2c3884bd3 *data/columb.polys.rda
40874e3ced720a596750f499ded8a60a *data/columb.rda
88d77139cc983317b6acd8c5f1252ab9 *gnugpl2.txt
......@@ -17,8 +17,8 @@ f693920e12f8a6f2b6cab93648628150 *index
5bf12ddc0dab9daae72271b96a15c539 *man/Predict.matrix.Rd
c51c9b8c9c73f81895176ded39b91394 *man/Predict.matrix.cr.smooth.Rd
612ab6354541ebe38a242634d73b66ba *man/Tweedie.Rd
4891a913dbfaa2dc8f41fef6c8590f21 *man/anova.gam.Rd
c06df4877abdaf07e67fc32c85fd1a87 *man/bam.Rd
6d711de718a09e1e1ae2a6967abade33 *man/anova.gam.Rd
03fadfd98a6f40d6d517ec895859cff3 *man/bam.Rd
b385d6d5419d0d6aefe03af1a79d5c4e *man/bam.update.Rd
4e925cb579f4693d1b8ec2d5092c0b37 *man/cSplineDes.Rd
9753a8051d9b495d9855537f3f26f491 *man/choose.k.Rd
......@@ -31,7 +31,7 @@ f764fb7cb9e63ff341a0075a3854ab5d *man/exclude.too.far.Rd
9ac808f5a2a43cf97f24798c0922c9bf *man/formXtViX.Rd
34308f4ada8e2aca9981a17794dac30b *man/formula.gam.Rd
6f405acde2d7b6f464cf45f5395113ba *man/full.score.Rd
3869522b0596acf818c7a9a170c94d4e *man/gam.Rd
c7f0549fe7b9da0624417e33ed92344d *man/gam.Rd
aeb7ec80d75244bc4f2f2fd796f86efd *man/gam.check.Rd
847599e287ecf79fbb7be2cb06d72742 *man/gam.control.Rd
fd98327327ba74bb1a61a6519f12e936 *man/gam.convergence.Rd
......@@ -45,7 +45,7 @@ dd35a8a851460c2d2106c03d544c8241 *man/gam.models.Rd
278e0b3aa7baa44dfb96e235ceb07f4c *man/gam2objective.Rd
4d5b3b1266edc31ce3b0e6be11ee9166 *man/gamObject.Rd
0ac5fb78c9db628ce554a8f68588058c *man/gamSim.Rd
d31bdc3f9ce07fb1caefe5a609fb3bfb *man/gamm.Rd
6078c49c55f4e7ce20704e4fbe3bba8a *man/gamm.Rd
1f5d723f2fa931297940e1a4a840d792 *man/get.var.Rd
a2ea1233d43fac89e0cacbc09a8d31e2 *man/in.out.Rd
2f222eeeb3d7bc42f93869bf8c2af58a *man/influence.gam.Rd
......@@ -88,20 +88,20 @@ f77ca1471881d2f93c74864d076c0a0e *man/rTweedie.Rd
47f5ed6af7c784f26e0fe8e027ee1d90 *man/smooth.construct.Rd
10a456bc3d7151f60be24a7e3bba3d30 *man/smooth.construct.ad.smooth.spec.Rd
9f05449c0114748608f99693ec25cf3f *man/smooth.construct.cr.smooth.spec.Rd
995e79cf7a23027c60d113481083c1cd *man/smooth.construct.ds.smooth.spec.Rd
131c3f2131138ba5d6f6bcde4be5ac31 *man/smooth.construct.ds.smooth.spec.Rd
1bb6748d4d2934e48f0572bc5114ffcb *man/smooth.construct.fs.smooth.spec.Rd
e0c041d7ec47290741e82fd71c408995 *man/smooth.construct.mrf.smooth.spec.Rd
78688702b6396f24692b74c554483428 *man/smooth.construct.ps.smooth.spec.Rd
0b134a171b9c5f8194787bf243f9c962 *man/smooth.construct.re.smooth.spec.Rd
53d7986dd7a54c1edb13ce84dbfe34a2 *man/smooth.construct.sos.smooth.spec.Rd
8672633a1fad8df3cb1f53d7fa883620 *man/smooth.construct.tensor.smooth.spec.Rd
a84c09609eedb6902b3ef7c8885010dd *man/smooth.construct.tp.smooth.spec.Rd
4b9bd43c3acbab6ab0159d59967e19db *man/smooth.construct.tp.smooth.spec.Rd
1de9c315702476fd405a85663bb32d1c *man/smooth.terms.Rd
6aa3bcbd3198d2bbc3b9ca12c9c9cd7e *man/smoothCon.Rd
5ae47a140393009e3dba7557af175170 *man/sp.vcov.Rd
83bd8e097711bf5bd0fff09822743d43 *man/spasm.construct.Rd
a17981f0fa2a6a50e637c98c672bfc45 *man/step.gam.Rd
b5fc9c991f1955b2ca25ed730aee4fd3 *man/summary.gam.Rd
dd54c87fb87c284d3894410f50550047 *man/summary.gam.Rd
22b571cbc0bd1e31f195ad927434c27e *man/t2.Rd
04076444b2c99e9287c080298f9dc1d7 *man/te.Rd
c3c23641875a293593fe4ef032b44aae *man/tensor.prod.model.matrix.Rd
......@@ -119,18 +119,18 @@ becbe3e1f1588f7292a74a97ef07a9ae *po/R-de.po
cd54024d76a9b53dc17ef26323fc053f *src/Makevars
a25e39145f032e8e37433651bba92ddf *src/gcv.c
2798411be2cb3748b8bd739f2d2016ee *src/gcv.h
d1a84be43974752e429c66bb7bdf7f0e *src/gdi.c
d40012dcda1a10ee535a9b3de9b46c19 *src/gdi.c
49af97195accb65adc75620183d39a4c *src/general.h
da280ee5538a828afde0a4f6c7b8328a *src/init.c
b42f1336e4bfd6d909d40573d09412af *src/magic.c
edf95099e3559dfa083270cd1f4bd4b3 *src/mat.c
770a885a5bb6c7a37b3d08709325949b *src/matrix.c
8b37eb0db498a3867dc83364dc65f146 *src/magic.c
ebd3647ff460932bada8f49159f40ac8 *src/mat.c
d21847ac9a1f91ee9446c70bd93a490a *src/matrix.c
54ce9309b17024ca524e279612a869d6 *src/matrix.h
66946ed100818db02c6775c868b2069c *src/mgcv.c
9c7041e09719bb67e0cd9717096dde82 *src/mgcv.h
08c94a2af4cd047ecd79871ecbafe33a *src/mgcv.c
99204b3b20c2e475d9e14022e0144804 *src/mgcv.h
2a1c4f1c10510a4338e5cc34defa65f6 *src/misc.c
7e0ba698a21a01150fda519661ef9857 *src/qp.c
cd563899be5b09897d1bf36a7889caa0 *src/qp.h
e9cab4a461eb8e086a0e4834cbf16f30 *src/sparse-smooth.c
319f8d9fd086f39b529ee966b18006b0 *src/tprs.c
6279fd50c8e27f485c8707a20aebbd3d *src/tprs.h
3a251ecac78b25c315de459cd2ba0b04 *src/tprs.c
d0531330f4c1209a1cdd7a75b1854724 *src/tprs.h
This diff is collapsed.
......@@ -1079,6 +1079,9 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
d <- eh$values;U <- eh$vectors
ind <- d < 0
d[ind] <- -d[ind] ## see Gill Murray and Wright p107/8
low.d <- max(d)*.Machine$double.eps^.7
ind <- d < low.d
d[ind] <- low.d
d <- 1/d
Nstep <- 0 * grad
......@@ -1193,7 +1196,7 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
hess <- diag(rho$rho1,nr,nr)%*%hess%*%diag(rho$rho1,nr,nr) + diag(rho$rho2*grad)
grad <- rho$rho1*grad
}
score1 <- score - abs(score) - 1 ## make damn sure that score1 < score
} # end of if (score1<= score )
ii <- ii + 1
} # end of step halving
......
......@@ -1290,7 +1290,7 @@ gammPQL <- function (fixed, random, family, data, correlation, weights,
data[[zz.name]] <- zz ## pseudodata to `data'
## find non-clashing name fro inverse weights, and make
## find non-clashing name for inverse weights, and make
## varFixed formula using it...
invwt.name <- new.name("invwt",names(data))
......@@ -1317,6 +1317,8 @@ gammPQL <- function (fixed, random, family, data, correlation, weights,
data[[invwt.name]] <- 1/wz
} ## end i in 1:niter
if (!converged) warning("gamm not converged, try increasing niterPQL")
fit$y <- fit0$y
fit$w <- w ## prior weights
fit
}
......@@ -1344,26 +1346,40 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
random.vars<-c(unlist(lapply(random, function(x) all.vars(formula(x)))),r.names)
} else random.vars<-NULL
if (!is.null(correlation))
{ cor.for<-attr(correlation,"formula")
if (!is.null(cor.for))
cor.vars<-all.vars(cor.for)
if (!is.null(correlation)) {
cor.for<-attr(correlation,"formula")
if (!is.null(cor.for)) cor.vars<-all.vars(cor.for)
} else cor.vars<-NULL
## now establish whether weights is varFunc or not...
wisvf <- try(inherits(weights,"varFunc"),silent=TRUE)
if (inherits(wisvf,"try-error")) wisvf <- FALSE
if (wisvf) { ## collect its variables
vf.for<-attr(weights,"formula")
if (!is.null(vf.for)) vf.vars<-all.vars(vf.for)
} else vf.vars <- NULL
# create model frame.....
gp<-interpret.gam(formula) # interpret the formula
##cl<-match.call() # call needed in gamm object for update to work
mf <- match.call(expand.dots=FALSE)
mf$formula <- gp$fake.formula
mf$correlation <- mf$random <- mf$family <- mf$control <- mf$scale <- mf$knots <- mf$sp <- mf$weights <-
mf$min.sp <- mf$H <- mf$gamma <- mf$fit <- mf$niterPQL <- mf$verbosePQL <- mf$G <- mf$method <- mf$... <- NULL
if (wisvf) {
mf$correlation <- mf$random <- mf$family <- mf$control <- mf$scale <- mf$knots <- mf$sp <- mf$weights <-
mf$min.sp <- mf$H <- mf$gamma <- mf$fit <- mf$niterPQL <- mf$verbosePQL <- mf$G <- mf$method <- mf$... <- NULL
} else {
mf$correlation <- mf$random <- mf$family <- mf$control <- mf$scale <- mf$knots <- mf$sp <-
mf$min.sp <- mf$H <- mf$gamma <- mf$fit <- mf$niterPQL <- mf$verbosePQL <- mf$G <- mf$method <- mf$... <- NULL
}
mf$drop.unused.levels <- TRUE
mf[[1]] <- as.name("model.frame")
pmf <- mf
gmf <- eval(mf, parent.frame()) # the model frame now contains all the data, for the gam part only
gam.terms <- attr(gmf,"terms") # terms object for `gam' part of fit -- need this for prediction to work properly
allvars <- c(cor.vars,random.vars)
if (!wisvf) weights <- gmf[["(weights)"]]
allvars <- c(cor.vars,random.vars,vf.vars)
if (length(allvars)) {
mf$formula <- as.formula(paste(paste(deparse(gp$fake.formula,backtick=TRUE),collapse=""),
"+",paste(allvars,collapse="+")))
......@@ -1451,7 +1467,7 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
if (family$family=="gaussian"&&family$link=="identity"&&
length(offset.name)==0) lme.used <- TRUE else lme.used <- FALSE
if (lme.used&&!is.null(weights)&&!inherits(weights,"varFunc")) lme.used <- FALSE
if (lme.used&&!is.null(weights)&&!wisvf) lme.used <- FALSE
if (lme.used)
{ ## following construction is a work-around for problem in nlme 3-1.52
......@@ -1462,14 +1478,13 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
##ret$lme<-lme(fixed.formula,random=rand,data=mf,correlation=correlation,control=control)
} else
{ ## Again, construction is a work around for nlme 3-1.52
if (inherits(weights,"varFunc"))
stop("weights must be like glm weights for generalized case")
if (wisvf) stop("weights must be like glm weights for generalized case")
if (verbosePQL) cat("\n Maximum number of PQL iterations: ",niterPQL,"\n")
eval(parse(text=paste("ret$lme<-gammPQL(",deparse(fixed.formula),
",random=rand,data=strip.offset(mf),family=family,",
"correlation=correlation,control=control,",
"weights=weights,niter=niterPQL,verbose=verbosePQL)",sep="")))
G$y <- ret$lme$y ## makes sure that binomial response is returned as a vector!
##ret$lme<-glmmPQL(fixed.formula,random=rand,data=mf,family=family,correlation=correlation,
## control=control,niter=niterPQL,verbose=verbosePQL)
}
......@@ -1668,11 +1683,9 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
names(object$coefficients) <- term.names # note - won't work on matrices!!
names(object$edf) <- term.names
if (is.null(weights))
object$prior.weights <- object$y*0+1
else if (inherits(weights,"varFunc"))
object$prior.weights <- varWeights.dfo(ret$lme,mf)^2
else object$prior.weights <- weights
if (is.null(weights)) object$prior.weights <- object$y*0+1
else if (wisvf) object$prior.weights <- varWeights.dfo(ret$lme,mf)^2
else object$prior.weights <- ret$lme$w
object$weights<-object$prior.weights
......
## R routines for the package mgcv (c) Simon Wood 2000-2010
## R routines for the package mgcv (c) Simon Wood 2000-2011
## With contributions from Henric Nilsson
......@@ -2277,9 +2277,9 @@ predict.gam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,
if (n.pterms) # work through parametric part
for (i in 1:n.pterms)
{ ii <- ind[object$assign==i]
fit[start:stop,i] <- as.matrix(X[,ii])%*%object$coefficients[ii]
fit[start:stop,i] <- X[,ii,drop=FALSE]%*%object$coefficients[ii]
if (se.fit) se[start:stop,i]<-
sqrt(rowSums((as.matrix(X[,ii])%*%object$Vp[ii,ii])*as.matrix(X[,ii])))
sqrt(rowSums((X[,ii,drop=FALSE]%*%object$Vp[ii,ii])*X[,ii,drop=FALSE]))
}
if (n.smooth&&!para.only)
......@@ -2496,16 +2496,42 @@ eigXVX <- function(X,V,rank=NULL,tol=.Machine$double.eps^.5) {
list(values=ed$values[ind],vectors=vec[,ind],rank=rank)
}
pinvXVX <- function(X,V,rank=NULL) {
smoothTest <- function(b,X,V,z) {
## Forms Cox, Koh, etc type test statistic, and
## obtains null distribution by simulation...
## if b are coefs f=Xb, cov(b) = V. z is a vector of
## i.i.d. N(0,1) deviates
qrx <- qr(X)
R <- qr.R(qrx)
V <- R%*%V[qrx$pivot,qrx$pivot]%*%t(R)
V <- (V + t(V))/2
ed <- eigen(V,symmetric=TRUE)
f <- t(ed$vectors)%*%R%*%b
t <- sum(f^2)
k <- ncol(X)
n.rep <- floor(length(z)/k)
lambda <- as.numeric(ed$values)
T <- colSums(lambda*matrix(z[1:(n.rep*k)]^2,k,n.rep))
pval <- sum(T>=t)
#if (pval==0) pval <- .5
pval <- pval/n.rep
list(stat=t,pval=pval)
}
pinvXVX <- function(X,V,rank=NULL,type=0) {
## Routine for forming fractionally trunctated
## pseudoinverse of XVX'. Returns as D where
## DD' gives the pseudoinverse itself.
## truncates to numerical rank, if this is
## less than supplied rank+1.
k <- max(0,floor(rank))
nu <- abs(rank - k)
# if (k < 1) { k <- 1; nu <- 0}
if (nu>0) k1 <- k+1 else k1 <- k
## The type argument specifies the type of truncation to use.
## on entry `rank' should be an edf estimate
## 0. Default using the fractionally truncated pinv.
## 1. Round down to k if k<= rank < k+0.05, otherwise up.
## 2. Naive rounding.
## 3. Round up.
## 4. Numerical rank estimation, tol=1e-3
qrx <- qr(X)
R <- qr.R(qrx)
......@@ -2513,6 +2539,25 @@ pinvXVX <- function(X,V,rank=NULL) {
V <- (V + t(V))/2
ed <- eigen(V,symmetric=TRUE)
k <- max(0,floor(rank))
nu <- abs(rank - k) ## fractional part of supplied edf
if (type==1) { ## round up is more than .05 above lower
if (rank > k + .05||k==0) k <- k + 1
nu <- 0;rank <- k
} else if (type==2) { ## naive round
nu <- 0;rank <- k <- max(1,round(rank))
warning("p-values may give low power in some circumstances")
} else if (type==3) { ## round up
nu <- 0; rank <- k <- max(1,ceiling(rank))
warning("p-values un-reliable")
} else if (type==4) { ## rank estimation
rank <- k <- max(sum(ed$values>1e-3*max(ed$values)),1)
nu <- 0
warning("p-values may give very low power")
}
if (nu>0) k1 <- k+1 else k1 <- k
## check that actual rank is not below supplied rank+1
r.est <- sum(ed$values > max(ed$values)*.Machine$double.eps^.9)
if (r.est<k1) {k1 <- k <- r.est;nu <- 0;rank <- r.est}
......@@ -2543,12 +2588,89 @@ pinvXVX <- function(X,V,rank=NULL) {
}
attr(vec,"rank") <- rank ## actual rank
vec ## vec%*%t(vec) is the pseudoinverse
}
} ## end of pinvXVX
testStat <- function(p,X,V,rank=NULL,type=0) {
## Routine for forming fractionally trunctated
## pseudoinverse of XVX'. And returning
## p'X'(XVX)^-Xp.
## Truncates to numerical rank, if this is
## less than supplied rank+1.
## The type argument specifies the type of truncation to use.
## on entry `rank' should be an edf estimate
## 0. Default using the fractionally truncated pinv.
## 1. Round down to k if k<= rank < k+0.05, otherwise up.
## 2. Naive rounding.
## 3. Round up.
## 4. Numerical rank estimation, tol=1e-3
qrx <- qr(X)
R <- qr.R(qrx)
V <- R%*%V[qrx$pivot,qrx$pivot]%*%t(R)
V <- (V + t(V))/2
ed <- eigen(V,symmetric=TRUE)
k <- max(0,floor(rank))
nu <- abs(rank - k) ## fractional part of supplied edf
if (type==1) { ## round up is more than .05 above lower
if (rank > k + .05||k==0) k <- k + 1
nu <- 0;rank <- k
} else if (type==2) { ## naive round
nu <- 0;rank <- k <- max(1,round(rank))
warning("p-values may give low power in some circumstances")
} else if (type==3) { ## round up
nu <- 0; rank <- k <- max(1,ceiling(rank))
warning("p-values un-reliable")
} else if (type==4) { ## rank estimation
rank <- k <- max(sum(ed$values>1e-3*max(ed$values)),1)
nu <- 0
warning("p-values may give very low power")
}
if (nu>0) k1 <- k+1 else k1 <- k
summary.gam <- function (object, dispersion = NULL, freq = FALSE, ...)
## check that actual rank is not below supplied rank+1
r.est <- sum(ed$values > max(ed$values)*.Machine$double.eps^.9)
if (r.est<k1) {k1 <- k <- r.est;nu <- 0;rank <- r.est}
## Get the eigenvectors...
# vec <- qr.qy(qrx,rbind(ed$vectors,matrix(0,nrow(X)-ncol(X),ncol(X))))
vec <- ed$vectors
if (k1<ncol(vec)) vec <- vec[,1:k1,drop=FALSE]
if (k==0) {
vec <- t(t(vec)*sqrt(nu/ed$val[1]))
##attr(vec,"rank") <- rank
##return(vec)
}
## deal with the fractional part of the pinv...
if (nu>0&&k>0) {
if (k>1) vec[,1:(k-1)] <- t(t(vec[,1:(k-1)])/sqrt(ed$val[1:(k-1)]))
b12 <- .5*nu*(1-nu)
if (b12<0) b12 <- 0
b12 <- sqrt(b12)
B <- matrix(c(1,b12,b12,nu),2,2)
ev <- diag(ed$values[k:k1]^-.5)
B <- ev%*%B%*%ev
eb <- eigen(B,symmetric=TRUE)
rB <- eb$vectors%*%diag(sqrt(eb$values))%*%t(eb$vectors)
vec[,k:k1] <- t(rB%*%t(vec[,k:k1]))
} else {
vec <- t(t(vec)/sqrt(ed$val[1:k]))
}
##attr(vec,"rank") <- rank ## actual rank
#d <- t(vec)%*%(X%*%p)
d <- t(vec)%*%(R%*%p)
d <- sum(d^2)
attr(d,"rank") <- rank ## actual rank
##vec ## vec%*%t(vec) is the pseudoinverse
d
} ## end of testStat
summary.gam <- function (object, dispersion = NULL, freq = FALSE, p.type=0, ...)
# summary method for gam object - provides approximate p values for terms + other diagnostics
# Improved by Henric Nilsson
{ pinv<-function(V,M,rank.tol=1e-6)
......@@ -2630,8 +2752,19 @@ summary.gam <- function (object, dispersion = NULL, freq = FALSE, ...)
} else { pTerms.df<-pTerms.chi.sq<-pTerms.pv<-array(0,0)}
## Now deal with the smooth terms....
m <- length(object$smooth) # number of smooth terms
if (p.type < 0 ) {
kmax <- 0
for (i in 1:m) {
start <- object$smooth[[i]]$first.para
stop <- object$smooth[[i]]$last.para
k <- stop-start+1
if (k>kmax) kmax <- k
}
z <- rnorm(kmax*100000) ## N(0,1) deviates to drive null simulation
}
m<-length(object$smooth) # number of smooth terms
df <- edf1 <- edf <- s.pv <- chi.sq <- array(0, m)
if (m>0) # form test statistics for each smooth
{ if (!freq) {
......@@ -2672,21 +2805,32 @@ summary.gam <- function (object, dispersion = NULL, freq = FALSE, ...)
df[i] <- attr(V, "rank")
} else { ## Inverted Nychka interval statistics
Xt <- X[,start:stop,drop=FALSE]
ft <- Xt%*%p
df[i] <- min(ncol(Xt),edf1[i])
D <- pinvXVX(Xt,V,df[i])
df[i] <- attr(D,"rank") ## df[i] ##+alpha*sum(object$smooth[[i]]$sp<0) ## i.e. alpha * (number free sp's)
chi.sq[i] <- sum((t(D)%*%ft)^2)
if (p.type < 0) {
##if (p.type == -2) Xt <- diag(length(p)) ## amazingly poor
res <- smoothTest(p,Xt,V,z)
df[i] <- edf[i] ## not really used
chi.sq[i] <- res$stat
s.pv[i] <- res$pval
} else {
#ft <- Xt%*%p
df[i] <- min(ncol(Xt),edf1[i])
##D <- pinvXVX(Xt,V,df[i],type=p.type)
chi.sq[i] <- Tp <- testStat(p,Xt,V,df[i],type=p.type)
df[i] <- attr(Tp,"rank") ##attr(D,"rank")
## chi.sq[i] <- sum((t(D)%*%ft)^2)
}
}
names(chi.sq)[i]<- object$smooth[[i]]$label
if (!est.disp)
s.pv[i] <- pchisq(chi.sq[i], df = df[i], lower.tail = FALSE)
else
s.pv[i] <- pf(chi.sq[i]/df[i], df1 = df[i], df2 = residual.df, lower.tail = FALSE)
## p-values are meaningless for very small edf. Need to set to NA
if (df[i] < 0.5) s.pv[i] <- NA
if (p.type> -.5||freq) {
if (!est.disp)
s.pv[i] <- pchisq(chi.sq[i], df = df[i], lower.tail = FALSE)
else
s.pv[i] <- pf(chi.sq[i]/df[i], df1 = df[i], df2 = residual.df, lower.tail = FALSE)
## p-values are meaningless for very small edf. Need to set to NA
if (df[i] < 0.5) s.pv[i] <- NA
}
}
if (!est.disp) {
if (freq) {
......@@ -2746,7 +2890,7 @@ print.summary.gam <- function(x, digits = max(3, getOption("digits") - 3),
}
anova.gam <- function (object, ..., dispersion = NULL, test = NULL, freq=FALSE)
anova.gam <- function (object, ..., dispersion = NULL, test = NULL, freq=FALSE,p.type=0)
# improved by Henric Nilsson
{ # adapted from anova.glm: R stats package
dotargs <- list(...)
......@@ -2765,7 +2909,7 @@ anova.gam <- function (object, ..., dispersion = NULL, test = NULL, freq=FALSE)
test = test))
if (!is.null(test)) warning("test argument ignored")
if (!inherits(object,"gam")) stop("anova.gam called with non gam object")
sg <- summary(object, dispersion = dispersion, freq = freq)
sg <- summary(object, dispersion = dispersion, freq = freq,p.type=p.type)
class(sg) <- "anova.gam"
sg
}
......
......@@ -959,7 +959,7 @@ smooth.construct.tp.smooth.spec<-function(object,data,knots)
## deal with possible extra arguments of "tp" type smooth
xtra <- list()
if (is.null(object$xt$max.knots)) xtra$max.knots <- 3000
if (is.null(object$xt$max.knots)) xtra$max.knots <- 2000
else xtra$max.knots <- object$xt$max.knots
if (is.null(object$xt$seed)) xtra$seed <- 1
else xtra$seed <- object$xt$seed
......@@ -1010,10 +1010,13 @@ smooth.construct.tp.smooth.spec<-function(object,data,knots)
assign(".Random.seed",seed,envir=.GlobalEnv) ## RNG behaves as if it had not been used
}
} ## end of large data set handling
if (object$bs.dim[1]<0) object$bs.dim <- 10*3^(object$dim-1) # auto-initialize basis dimension
##if (object$bs.dim[1]<0) object$bs.dim <- 10*3^(object$dim-1) # auto-initialize basis dimension
object$p.order[is.na(object$p.order)] <- 0 ## auto-initialize
k<-object$bs.dim
M<-null.space.dimension(object$dim,object$p.order)
def.k <- c(8,27,100) ## default penalty range space dimension for different dimensions
dd <- min(object$dim,length(def.k))
if (object$bs.dim[1]<0) object$bs.dim <- M+def.k[dd] ##10*3^(object$dim-1) # auto-initialize basis dimension
k<-object$bs.dim
if (k<M+1) # essential or construct_tprs will segfault, as tprs_setup does this
{ k<-M+1
object$bs.dim<-k
......@@ -2408,7 +2411,7 @@ smooth.construct.ds.smooth.spec<-function(object,data,knots)
if (object$bs.dim[1]<0) object$bs.dim <- 10*3^(object$dim[1]-1) # auto-initialize basis dimension
## if (object$bs.dim[1]<0) object$bs.dim <- 10*3^(object$dim[1]-1) # auto-initialize basis dimension
## Check the conditions on Duchon's m, s and n (p.order[1], p.order[2] and dim)...
......@@ -2451,6 +2454,15 @@ smooth.construct.ds.smooth.spec<-function(object,data,knots)
T <- DuchonT(knt,m=object$p.order[1],n=object$dim) ## constraint matrix
ind <- 1:ncol(T)
def.k <- c(10,30,100)
dd <- min(object$dim,length(def.k))
if (object$bs.dim[1]<0) object$bs.dim <- ncol(T) + def.k[dd] ## default basis dimension
if (object$bs.dim < ncol(T)+1) {
object$bs.dim <- ncol(T)+1
warning("basis dimension reset to minimum possible")
}
k <- object$bs.dim
if (k<nk) {
......
** denotes quite substantial/important changes
*** denotes really big changes
1.7-12
* bam can now compute the leading order QR decomposition on a cluster
set up using the parallel package.
* Default k for "tp" and "ds" modified so that it doesn't exceed the 100 +
the null space dimension (to avoid complaints from users smoothing in
quite alot of dimensions). Also defualt sub-sample size reduced to 2000.
* Greater use of BLAS routines in the underlying method code. In particular
all leading order operations count steps for gam fitting now use BLAS.
You'll need R to be using a rather fancy BLAS to see much difference,
however.
* Amusingly, some highly tuned blas libraries can result in lapack not always
giving identical eigenvalues when called twice with the same matrix. The
`newton' optimizer had assumed this wouldn't happen: not any more.
* Now byte compiled by default. Turn this off in DESCRIPTION if it interferes
with debugging.
* summary.gam p-value computation options modified (default remains the
same).
* summary.gam default p-value computation made more computationally
efficient.
* gamm and bam could fail under some options for specifying binomial models.
Now fixed.
1.7-11
* smoothCon bug fix to avoid NA labels for matrix arguments when
......
......@@ -13,7 +13,7 @@ suggests that best p-value performance results from using ML estimated smoothing
}
\usage{
\method{anova}{gam}(object, ..., dispersion = NULL, test = NULL,
freq = FALSE)
freq = FALSE,p.type=0)
\method{print}{anova.gam}(x, digits = max(3, getOption("digits") - 3),...)
}
%- maybe also `usage' for other objects documented here.
......@@ -25,6 +25,8 @@ suggests that best p-value performance results from using ML estimated smoothing
\code{"Chisq"}, \code{"F"} or \code{"Cp"}. }
\item{freq}{whether to use frequentist or Bayesian approximations for single smooth term
p-values. See \code{\link{summary.gam}} for details.}
\item{p.type}{selects exact test statistic to use for single smooth term p-values. See
\code{\link{summary.gam}} for details.}
\item{digits}{number of digits to use when printing output.}
}
\details{ If more than one fitted model is provided than \code{anova.glm} is
......
......@@ -10,14 +10,14 @@ The degree of smoothness of model terms is estimated as part of
fitting. In use the function is much like \code{\link{gam}}, except that the numerical methods
are designed for datasets containing upwards of several tens of thousands of data. The advantage
of \code{bam} is much lower memory footprint than \code{\link{gam}}, but it can also be much faster,
for large datasets.
for large datasets. \code{bam} can also compute on a cluster set up by the \link[parallel]{parallel} package.
}
\usage{
bam(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,
na.action=na.omit, offset=NULL,method="REML",control=list(),
scale=0,gamma=1,knots=NULL,sp=NULL,min.sp=NULL,paraPen=NULL,
chunk.size=10000,rho=0,sparse=FALSE,...)
chunk.size=10000,rho=0,sparse=FALSE,cluster=NULL,gc.level=1,...)
}
%- maybe also `usage' for other objects documented here.
......@@ -110,6 +110,12 @@ if smooths share smoothing parameters).}
then in principle computation could be made faster using sparse matrix methods, and you could set this to \code{TRUE}.
In practice the speed up is disappointing, and the computation is less well conditioned than the default. See details.}
\item{cluster}{\code{bam} can compute the computationally dominant QR decomposition in parallel using \link[parallel]{parLapply}
from the \code{parallel} package, if it is supplied with a cluster on which to do this (a cluster here can be some cores of a
single machine). See details and example code.
}
\item{gc.level}{to keep the memory footprint down, it helps to call the garbage collector often, but this takes
a substatial amount of time. Setting this to zero means that garbage collection only happens when R decides it should. Setting to 2 gives frequent garbage collection. 1 is in between.}
\item{...}{further arguments for
passing on e.g. to \code{gam.fit} (such as \code{mustart}). }
......@@ -133,10 +139,11 @@ for REML/ML is justified via the asymptotic multivariate normality of Q'z where
Note that POI is not as stable as the default nested iteration used with \code{\link{gam}}, but that for very large, information rich,
datasets, this is unlikely to matter much.
Note also that it is possible to spend most of the computational time on basis evaluation, if an expensive basis is used. In practice
this means that the default \code{"tp"} basis should be avoided: almost any other basis (e.g. \code{"cr"} or \code{"ps"})
Note also that it is possible to spend most of the computational time on basis evaluation, if an expensive basis is used. In practice this means that the default \code{"tp"} basis should be avoided: almost any other basis (e.g. \code{"cr"} or \code{"ps"})
can be used in the 1D case, and tensor product smooths (\code{te}) are typically much less costly in the multi-dimensional case.
If \code{cluster} is provided as a cluster set up using \code{\link[parallel]{makeCluster}} (or \code{\link[parallel]{makeForkCluster}}) from the \code{parallel} package, then the rate limiting QR decomposition of the model matrix is performed in parallel using this cluster. Note that the speed ups are often not that great. On a multi-core machine it is usually best to set the cluster size to the number of physical cores, which is often less than what is reported by \code{\link[parallel]{detectCores}}. Using more than the number of physical cores can result in no speed up at all (or even a slow down). Note that a highly parallel BLAS may negate all advantage from using a cluster of cores. Computing in parallel of course requires more memory than computing in series. See examples.
If the argument \code{sparse=TRUE} then QR updating is replaced by an alternative scheme, in which the model matrix is stored whole
as a sparse matrix. This only makes sense if all smooths are P-splines and all tensor products are of the
form \code{te(...,bs="ps",np=FALSE)}, but no check is made. The computations are then based on the Choleski decomposition of
......@@ -151,10 +158,9 @@ by this approach, while the computation is less stable than the default, and the
\references{
\url{http://www.maths.bath.ac.uk/~sw283/}
}
\author{ Simon N. Wood \email{simon.wood@r-project.org}
......@@ -201,15 +207,34 @@ summary(ba)
## A Poisson example...
dat <- gamSim(1,n=15000,dist="poisson",scale=.1)
b1 <- bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat,method="ML",family=poisson())
system.time(b1 <- bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat,method="ML",family=poisson()))
b1
## repeat on a cluster
require(parallel)
nc <- 2 ## cluster size, set for example portability
if (detectCores()>1) { ## no point otherwise
cl <- makeCluster(nc)
## could also use makeForkCluster, but read warnings first!
} else cl <- NULL
system.time(b2 <- bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat,method="ML",family=poisson(),cluster=cl))
## ... first call has startup overheads, repeat shows speed up...
system.time(b2 <- bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat,method="ML",family=poisson(),cluster=cl))
if (!is.null(cl)) stopCluster(cl)
b2
## Sparse smoothers example...
b2 <- bam(y ~ te(x0,x1,bs="ps",k=10,np=FALSE)+s(x2,bs="ps",k=30)+
b3 <- bam(y ~ te(x0,x1,bs="ps",k=10,np=FALSE)+s(x2,bs="ps",k=30)+
s(x3,bs="ps",k=30),data=dat,method="ML",
family=poisson(),sparse=TRUE)
b2
b3
}
......
......@@ -352,28 +352,45 @@ an infinite range on the linear predictor scale.
library(mgcv)
set.seed(0) ## simulate some data...
dat <- gamSim(1,n=400,dist="normal",scale=2)