Commit b5dda8c1 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.7-18

parent d9e24579
Package: mgcv
Version: 1.7-17
Version: 1.7-18
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: Mixed GAM Computation Vehicle with GCV/AIC/REML smoothness
......@@ -14,6 +14,6 @@ Suggests: nlme (>= 3.1-64), splines, Matrix, parallel
LazyLoad: yes
ByteCompile: yes
License: GPL (>= 2)
Packaged: 2012-05-23 08:29:33 UTC; sw283
Packaged: 2012-06-12 08:47:36 UTC; sw283
Repository: CRAN
Date/Publication: 2012-05-23 09:44:27
Date/Publication: 2012-06-12 11:02:44
09cc0a6268c851dbf582b6b0c49c3cda *DESCRIPTION
d2d8a4e916f2f1161c5a07902e09fff2 *DESCRIPTION
50152051f123a389d421aa3130dce252 *NAMESPACE
ccbfea150b6066c79b431a94f410f00c *R/bam.r
a868f81bfdea5adc552233d1f77c0bbe *R/fast-REML.r
3af17533752a84864d46354712372ea5 *R/bam.r
5a15ef36318076ec076a7aef2a1bd251 *R/fast-REML.r
b160632e8f38fa99470e2f8cba82a495 *R/gam.fit3.r
902657a0ee2dedc3fdfa501bf3b37c5b *R/gam.sim.r
ad57e83090b4633ee50041fd3571c016 *R/gamm.r
0462c2304ecf6ac6392622347b612ccf *R/mgcv.r
5c5f68e76697c356b95e084bee5d7776 *R/plots.r
7aa8babce7db388dd66d75d72e3007d2 *R/mgcv.r
958a9a13b359c6e6f23f340a05d98899 *R/plots.r
b662840dd06dc05bca6930ce0864d3b4 *R/smooth.r
fb66d6c18398411a99ffcb788b854f13 *R/sparse.r
366a2457115436852dff53237d746452 *changeLog
a5d19ababeb66e23a576ebdb17b14e2d *changeLog
e468195a83fab90da8e760c2c3884bd3 *data/columb.polys.rda
40874e3ced720a596750f499ded8a60a *data/columb.rda
88d77139cc983317b6acd8c5f1252ab9 *gnugpl2.txt
......@@ -18,8 +18,8 @@ f693920e12f8a6f2b6cab93648628150 *index
5bf12ddc0dab9daae72271b96a15c539 *man/Predict.matrix.Rd
c51c9b8c9c73f81895176ded39b91394 *man/Predict.matrix.cr.smooth.Rd
612ab6354541ebe38a242634d73b66ba *man/Tweedie.Rd
085090ec00015c13044857b56d7440dc *man/anova.gam.Rd
fa6e8f98dc01de508e95d1008ae84d60 *man/bam.Rd
68cba9c7ec24688d2df4bb5a29b22564 *man/anova.gam.Rd
585d6b6a2e7816df83d5f9193195a233 *man/bam.Rd
b385d6d5419d0d6aefe03af1a79d5c4e *man/bam.update.Rd
4e925cb579f4693d1b8ec2d5092c0b37 *man/cSplineDes.Rd
9b4d616d1b6c4a46ca77d16cded3f806 *man/choose.k.Rd
......@@ -33,12 +33,12 @@ f764fb7cb9e63ff341a0075a3854ab5d *man/exclude.too.far.Rd
bb099e6320a6c1bd79fe4bf59e0fde08 *man/formula.gam.Rd
6f405acde2d7b6f464cf45f5395113ba *man/full.score.Rd
d7781fc09ad742a63746ea92a927eee4 *man/gam.Rd
eb8477a38223fb3d5f94f98e6958ed83 *man/gam.check.Rd
fe61dd0efab9e920c17335faf3d5764c *man/gam.check.Rd
96c9417e4ac5d79ec9ed3f363adfc4e9 *man/gam.control.Rd
fd98327327ba74bb1a61a6519f12e936 *man/gam.convergence.Rd
58ab3b3d6f4fd0d008d73c3c4e6d3305 *man/gam.fit.Rd
21339a5d1eb8c83679dd9022ab682b5e *man/gam.fit3.Rd
dd35a8a851460c2d2106c03d544c8241 *man/gam.models.Rd
8c4805c4afe742aa81ba96ee21a854a2 *man/gam.models.Rd
e969287d1a5c281faa7eb6cfce31a7c5 *man/gam.outer.Rd
96676186808802344a99f9d3170bf775 *man/gam.selection.Rd
76651917bd61fc6bc447bbb40b887236 *man/gam.side.Rd
......@@ -63,7 +63,7 @@ d564d1c5b2f780844ff10125348f2e2c *man/mgcv-FAQ.Rd
18a9858b6f3ffde288b0bf9e1a5da2f6 *man/model.matrix.gam.Rd
bc9b89db7e7ff246749551c16f5f1f07 *man/mono.con.Rd
3a4090ac778273861d97077681a55df2 *man/mroot.Rd
8aea04d0764d195409da798b33516051 *man/negbin.Rd
f0363e3309cca00db840ad0fb8359c0f *man/negbin.Rd
41de8762baab4fc0cf1224df168520fe *man/new.name.Rd
dffa2d51c704c610088fa02d7220b05e *man/notExp.Rd
150d7f8a427117353c5c2e466ff0bfae *man/notExp2.Rd
......@@ -78,7 +78,7 @@ de454d1dc268bda008ff46639a89acec *man/place.knots.Rd
afca36f5b1a5d06a7fcab2eaaa029e7e *man/predict.bam.Rd
df63d7045f83a1dc4874fcac18a2303c *man/predict.gam.Rd
a594eb641cae6ba0b83d094acf4a4f81 *man/print.gam.Rd
5311a1e83ae93aef5f9ae38f7492536a *man/qq.gam.Rd
b8ac3ed8fc05bb14c605b996404853cd *man/qq.gam.Rd
f77ca1471881d2f93c74864d076c0a0e *man/rTweedie.Rd
354798b41bf766baaa4d0bad85274c25 *man/random.effects.Rd
37669f97e17507f3ae2d6d1d74feb9d7 *man/residuals.gam.Rd
......@@ -101,7 +101,7 @@ d202c6718fb1138fdd99e6102250aedf *man/smooth.construct.re.smooth.spec.Rd
5ae47a140393009e3dba7557af175170 *man/sp.vcov.Rd
83bd8e097711bf5bd0fff09822743d43 *man/spasm.construct.Rd
700699103b50f40d17d3824e35522c85 *man/step.gam.Rd
05aa756707171bf7581bc8344d0f148f *man/summary.gam.Rd
01a16d9badcb601166998d0893f75cfc *man/summary.gam.Rd
7f383eaaca246c8bf2d5b74d841f7f8a *man/t2.Rd
04076444b2c99e9287c080298f9dc1d7 *man/te.Rd
c3c23641875a293593fe4ef032b44aae *man/tensor.prod.model.matrix.Rd
......
......@@ -364,18 +364,22 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, coef=NULL,etas
log.phi=log.phi,phi.fixed=scale>0,rss.extra=rss.extra,
nobs =nobs+nobs.extra,Mp=um$Mp)
res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=FALSE)
object <- list(coefficients=res$beta,full.sp = exp(fit$rho.full),
object <- list(coefficients=res$beta,
gcv.ubre=fit$reml,mgcv.conv=list(iter=fit$iter,
message=fit$conv),rank=ncol(um$X),
Ve=NULL,scale.estimated = scale<=0,outer.info=fit$outer.info,
optimizer=c("perf","newton"))
if (scale<=0) { ## get sp's and scale estimate
nsp <- length(fit$rho)
object$sig2 <- object$scale <- exp(fit$rho[nsp])
object$sp <- exp(fit$rho[-nsp])
nsp <- length(fit$rho.full)
object$full.sp <- exp(fit$rho.full[-nsp])
} else { ## get sp's
object$sig2 <- object$scale <- scale
object$sp <- exp(fit$rho)
object$full.sp <- exp(fit$rho.full)
}
class(object)<-c("gam")
} else { ## method is one of "ML", "P-REML" etc...
......@@ -911,7 +915,7 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level
log.phi=log.phi,phi.fixed=scale>0,rss.extra=rss.extra,
nobs =n,Mp=um$Mp)
res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=TRUE,scale=scale)
object <- list(coefficients=res$beta,edf=res$edf,edf1=res$edf1,full.sp = exp(fit$rho.full),
object <- list(coefficients=res$beta,edf=res$edf,edf1=res$edf1,
gcv.ubre=fit$reml,hat=res$hat,mgcv.conv=list(iter=fit$iter,
message=fit$conv),rank=ncol(um$X),
Ve=res$Ve,Vp=res$Vp,scale.estimated = scale<=0,outer.info=fit$outer.info,
......@@ -919,10 +923,13 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level
if (scale<=0) { ## get sp's and scale estimate
nsp <- length(fit$rho)
object$sig2 <- object$scale <- exp(fit$rho[nsp])
object$sp <- exp(fit$rho[-nsp])
object$sp <- exp(fit$rho[-nsp])
nsp <- length(fit$rho.full)
object$full.sp <- exp(fit$rho.full[-nsp])
} else { ## get sp's
object$sig2 <- object$scale <- scale
object$sp <- exp(fit$rho)
object$full.sp <- exp(fit$rho.full)
}
if (rho!=0) { ## correct RE/ML score for AR1 transform
......@@ -1227,6 +1234,10 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
object$aic <- family$aic(object$y,1,object$fitted.values,object$weights,object$deviance) +
2*sum(object$edf)
object$null.deviance <- sum(family$dev.resids(object$y,mean(object$y),object$weights))
if (!is.null(object$full.sp)) {
if (length(object$full.sp)==length(object$sp)&&
all.equal(object$sp,object$full.sp)==TRUE) object$full.sp <- NULL
}
object
} ## end of bam
......@@ -1326,7 +1337,7 @@ bam.update <- function(b,data,chunk.size=10000) {
res <- Sl.postproc(b$Sl,fit,um$undrop,b$qrx$R,cov=TRUE,scale=scale)
object <- list(coefficients=res$beta,edf=res$edf,full.sp = exp(fit$rho.full),
object <- list(coefficients=res$beta,edf=res$edf,
gcv.ubre=fit$reml,hat=res$hat,outer.info=list(iter=fit$iter,
message=fit$conv),optimizer="fast-REML",rank=ncol(um$X),
Ve=NULL,Vp=res$V,scale.estimated = scale<=0)
......@@ -1334,9 +1345,12 @@ bam.update <- function(b,data,chunk.size=10000) {
nsp <- length(fit$rho)
object$sig2 <- object$scale <- exp(fit$rho[nsp])
object$sp <- exp(fit$rho[-nsp])
nsp <- length(fit$rho.full)
object$full.sp <- exp(fit$rho.full[-nsp])
} else { ## get sp's
object$sig2 <- object$scale <- scale
object$sp <- exp(fit$rho)
object$full.sp <- exp(fit$rho.full)
}
if (b$AR1.rho!=0) { ## correct RE/ML score for AR1 transform
......@@ -1399,6 +1413,7 @@ bam.update <- function(b,data,chunk.size=10000) {
b$gcv.ubre <- b$gcv.ubre - (n-1)*log(ld)
}
}
b$G$X <- NULL
b$linear.predictors <- as.numeric(predict.gam(b,newdata=b$model,block.size=chunk.size))
b$fitted.values <- b$linear.predictor ## strictly additive only!
......
## code for fast REML computation. key feature is that first and
## second derivatives come at no increase in leading order
## computational cost, realtive to evaluation!
## computational cost, relative to evaluation!
## (c) Simon N. Wood, 2010-2012
Sl.setup <- function(G) {
......@@ -91,7 +91,7 @@ Sl.setup <- function(G) {
Sl[[b]]$S <- G$smooth[[i]]$S
nb <- nrow(Sl[[b]]$S[[1]])
sbStart <- sbStop <- rep(NA,m)
## overlap testing requires that block ranges
## overlap testing requires the block ranges
for (j in 1:m) { ## get block range for each S[[j]]
ir <- range((1:nb)[rowSums(abs(Sl[[b]]$S[[j]]))>0])
ic <- range((1:nb)[colSums(abs(Sl[[b]]$S[[j]]))>0])
......@@ -209,8 +209,10 @@ Sl.initial.repara <- function(Sl,X,inverse=FALSE) {
if (is.matrix(Sl[[b]]$D)) {
X[ind,] <- Sl[[b]]$D%*%X[ind,,drop=FALSE]
X[,ind] <- X[,ind,drop=FALSE]%*%t(Sl[[b]]$D)
} else
X[ind,ind] <- Sl[[b]]$D*t(Sl[[b]]$D*t(X[ind,ind,drop=FALSE]))
} else {
X[,ind] <- t(Sl[[b]]$D * t(X[,ind,drop=FALSE]))
X[ind,] <- Sl[[b]]$D * X[ind,,drop=FALSE]
}
}
} else { ## it's a parameter vector
for (b in 1:length(Sl)) {
......@@ -642,7 +644,7 @@ fast.REML.fit <- function(Sl,X,y,rho,L=NULL,rho.0=NULL,log.phi=0,phi.fixed=TRUE,
best$iter <- iter
best$outer.info <- list(conv = best$conv, iter = best$iter,grad = grad,hess = hess)
best$rho <- rho
best$rho.full <- L%*%rho+rho.0
best$rho.full <- as.numeric(L%*%rho+rho.0)
best ## return the best fit (note that it will need post-processing to be useable)
} ## end fast.REML.fit
......
......@@ -2091,30 +2091,31 @@ predict.gam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,
# get data from which to predict.....
nd.is.mf <- FALSE # need to flag if supplied newdata is already a model frame
if (newdata.guaranteed==FALSE)
{ if (missing(newdata)) # then "fake" an object suitable for prediction
{ newdata<-object$model
{ if (missing(newdata)) { # then "fake" an object suitable for prediction
newdata <- object$model
new.data.ok <- FALSE
nd.is.mf <- TRUE
}
else # do an R ``standard'' evaluation to pick up data
{ new.data.ok <- TRUE
if (is.data.frame(newdata)&&!is.null(attr(newdata,"terms"))) # it's a model frame
{ if (sum(!(names(object$model)%in%names(newdata)))) stop(
} else { # do an R ``standard'' evaluation to pick up data
new.data.ok <- TRUE
if (is.data.frame(newdata)&&!is.null(attr(newdata,"terms"))) { # it's a model frame
if (sum(!(names(object$model)%in%names(newdata)))) stop(
"newdata is a model.frame: it should contain all required variables\n")
nd.is.mf <- TRUE
} else
{ ## Following is non-standard to allow convenient splitting into blocks
} else {
## Following is non-standard to allow convenient splitting into blocks
## below, and to allow checking that all variables are in newdata ...
## get names of required variables, less response, but including offset variable
Terms <- delete.response(terms(object))
allNames <- all.vars(Terms)
ff <- reformulate(allNames)
if (sum(!(allNames%in%names(newdata)))) {
warning("not all required variables have been supplied in newdata!\n")}
## note that `xlev' argument not used here, otherwise `as.factor' in
## formula can cause a problem ... levels reset later.
newdata <- eval(model.frame(ff,data=newdata,na.action=na.act),parent.frame())
if (length(allNames) > 0) {
ff <- reformulate(allNames)
if (sum(!(allNames%in%names(newdata)))) {
warning("not all required variables have been supplied in newdata!\n")}
## note that `xlev' argument not used here, otherwise `as.factor' in
## formula can cause a problem ... levels reset later.
newdata <- eval(model.frame(ff,data=newdata,na.action=na.act),parent.frame())
} ## otherwise it's intecept only and newdata can be left alone
na.act <- attr(newdata,"na.action")
}
}
......@@ -2616,7 +2617,7 @@ if (rank<1) rank <- 1 ## EXPERIMENTAL
summary.gam <- function (object, dispersion = NULL, freq = TRUE, p.type=0, ...) {
summary.gam <- function (object, dispersion = NULL, freq = FALSE, p.type=0, ...) {
## summary method for gam object - provides approximate p values
## for terms + other diagnostics
## Improved by Henric Nilsson
......@@ -2838,8 +2839,10 @@ summary.gam <- function (object, dispersion = NULL, freq = TRUE, p.type=0, ...)
}
}
w <- object$prior.weights
mean.y <- sum(w*object$y)/sum(w)
w <- sqrt(w)
nobs <- nrow(object$model)
r.sq<- 1 - var(w*(object$y-object$fitted.values))*(nobs-1)/(var(w*object$y)*residual.df)
r.sq<- 1 - var(w*(object$y-object$fitted.values))*(nobs-1)/(var(w*(object$y-mean.y))*residual.df)
dev.expl<-(object$null.deviance-object$deviance)/object$null.deviance
ret<-list(p.coeff=p.coeff,se=se,p.t=p.t,p.pv=p.pv,residual.df=residual.df,m=m,chi.sq=chi.sq,
s.pv=s.pv,scale=dispersion,r.sq=r.sq,family=object$family,formula=object$formula,n=nobs,
......@@ -2877,7 +2880,7 @@ print.summary.gam <- function(x, digits = max(3, getOption("digits") - 3),
}
anova.gam <- function (object, ..., dispersion = NULL, test = NULL, freq=TRUE,p.type=0)
anova.gam <- function (object, ..., dispersion = NULL, test = NULL, freq=FALSE,p.type=0)
# improved by Henric Nilsson
{ # adapted from anova.glm: R stats package
dotargs <- list(...)
......@@ -3029,7 +3032,7 @@ gam.vcomp <- function(x,rescale=TRUE,conf.lev=.95) {
}
## If a Hessian exists, get CI's for variance components...
if (x$method%in%c("ML","P-ML","REML","P-REML")&&!is.null(x$outer.info$hess)) {
if (x$method%in%c("ML","P-ML","REML","P-REML","fREML")&&!is.null(x$outer.info$hess)) {
H <- x$outer.info$hess ## the hessian w.r.t. log sps and log scale
if (ncol(H)>length(x$sp)) scale.est <- TRUE else scale.est <- FALSE
......
......@@ -1011,8 +1011,8 @@ plot.gam <- function(x,residuals=FALSE,rug=TRUE,se=TRUE,pages=0,select=NULL,scal
if (se1.mult<0) se1.mult<-0;if (se2.mult < 0) se2.mult <- 0
} else se1.mult <- se2.mult <-1
if (se && x$Vp[1,1]<=0) ## check that variances are actually available
{ se<-FALSE
if (se && x$Vp[1,1] < 0) ## check that variances are actually available
{ se <- FALSE
warning("No variance estimates available")
}
......
** denotes quite substantial/important changes
*** denotes really big changes
1.7-18
* Embarrassingly, the adjusted r^2 computation in summary.gam was wrong
for models with prior weights. Now fixed, thanks to Antony Unwin.
* bam(...,method="fREML") could give incorrect edfs for "re" terms as a
result of a matrix indexing error in Sl.initial.repara. Now fixed.
Thanks to Martijn Wieling for reporting this.
* summary.gam had freq=TRUE set as default in 1.7-17. This gave better
p-values for paraPen terms, but spoiled p-values for fixed effects in the
presence of "re" terms (a rather more common setup). Default now reset to
freq=FALSE.
* bam(...,method="fREML") made fully compatible with gam.vcomp.
* bam and negbin examples speeded up
* predict.gam could fail for models of the form y~1 when newdata are supplied.
(Could make some model averaging methods fail). Fixed.
* plot.gam had an overzealous check for availibility of variance estimates,
which could make rank deficient models fail to plot CIs. fixed.
1.7-17
** p-values for terms with no un-penalized components were poor. The theory on
......@@ -111,7 +135,7 @@
made explicit.
* "re" smooths are no longer subject to side constraint under nesting (since
this is almost alway un-necessary and undesirable, and often unexpected).
this is almost always un-necessary and undesirable, and often unexpected).
* side.con modified to allow smooths to be excluded and to allow side
constraint computation to take account of penalties (unused at present).
......
......@@ -12,7 +12,7 @@ to zero. See details.
}
\usage{
\method{anova}{gam}(object, ..., dispersion = NULL, test = NULL,
freq = TRUE,p.type=0)
freq = FALSE,p.type=0)
\method{print}{anova.gam}(x, digits = max(3, getOption("digits") - 3),...)
}
%- maybe also `usage' for other objects documented here.
......@@ -34,8 +34,8 @@ in effective degress of freedom. The p-values resulting from this are only appro
and must be used with care. The approximation is most accurate when the comparison
relates to unpenalized terms, or smoothers with a null space of dimension greater than zero.
(Basically we require that the difference terms could be well approximated by unpenalized
terms with degrees of freedom approximately the effective degrees of freedom). In simualtions the
p-values are usually slightly to low. For terms with a zero-dimensional null space
terms with degrees of freedom approximately the effective degrees of freedom). In simulations the
p-values are usually slightly too low. For terms with a zero-dimensional null space
(i.e. those which can be penalized to zero) the approximation is often very poor, and significance
can be greatly overstated: i.e. p-values are often substantially too low. This case applies to random effect terms.
......@@ -47,6 +47,13 @@ provided here are better justified than in the multi model case, and have close
correct distribution under the null for smooths with a non-zero dimensional null space (i.e. terms that can-not be penalized to zero). ML or REML smoothing parameter selection leads to the best results in simulations as they tend to avoid occasional severe undersmoothing. In the single model case \code{print.anova.gam} is used as the
printing method.
By default the p-values for parametric model terms are also based on Wald tests using the Bayesian
covariance matrix for the coefficients. This is appropriate when there are "re" terms present, and is
otherwise rather similar to the results using the frequentist covariance matrix (\code{freq=TRUE}), since
the parametric terms themselves are usually unpenalized. Default P-values for parameteric terms that are
penalized using the \code{paraPen} argument will not be good. However if such terms represent conventional
random effects with full rank penalties, then setting \code{freq=TRUE} is appropriate.
}
\value{In the multi-model case \code{anova.gam} produces output identical to
......@@ -65,6 +72,9 @@ improvements by Henric Nilsson.}
\section{WARNING}{ If models 'a' and 'b' differ only in terms with no un-penalized components then
p values from anova(a,b) are unreliable, and usually much too low.
Default P-values will usually be wrong for parametric terms penalized using `paraPen': use freq=TRUE
to obtain better p-values when the penalties are full rank and represent conventional random effects.
}
\seealso{ \code{\link{gam}}, \code{\link{predict.gam}},
......
......@@ -200,9 +200,9 @@ The negbin family is only supported for the *known theta* case.
\examples{
library(mgcv)
## Some moderately large examples...
## Some not very large examples...
dat <- gamSim(1,n=100000,dist="normal",scale=20)
dat <- gamSim(1,n=40000,dist="normal",scale=20)
bs <- "cr";k <- 20
b <- bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat)
......@@ -216,12 +216,13 @@ summary(ba)
## A Poisson example...
dat <- gamSim(1,n=35000,dist="poisson",scale=.1)
k <- 15
dat <- gamSim(1,n=15000,dist="poisson",scale=.1)
system.time(b1 <- bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat,method="ML",family=poisson()))
s(x3,bs=bs,k=k),data=dat,family=poisson()))
b1
## repeat on a cluster
## repeat on a cluster (need much larger n to be worthwhile!)
require(parallel)
nc <- 2 ## cluster size, set for example portability
if (detectCores()>1) { ## no point otherwise
......@@ -230,12 +231,12 @@ if (detectCores()>1) { ## no point otherwise
} else cl <- NULL
system.time(b2 <- bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat,method="ML",family=poisson(),cluster=cl))
s(x3,bs=bs,k=k),data=dat,family=poisson(),cluster=cl))
## ... first call has startup overheads, repeat shows speed up...
system.time(b2 <- bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat,method="ML",family=poisson(),cluster=cl))
s(x3,bs=bs,k=k),data=dat,family=poisson(),cluster=cl))
fv <- predict(b2,cluster=cl) ## parallel prediction
......
......@@ -54,8 +54,8 @@ to be useful in this case.
\references{
N.H. Augustin, E-A Sauleaub, S.N. Wood (2012) On quantile quantile plots for generalized linear models
Computational Statistics & Data Analysis.
N.H. Augustin, E-A Sauleaub, S.N. Wood (2012) On quantile quantile plots for generalized linear models.
Computational Statistics & Data Analysis. 56(8), 2404-3409.
Wood S.N. (2006) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
......
......@@ -230,6 +230,10 @@ inverse of the penalty matrix multiplied by the estimated scale parameter and di
smoothing parameter for the penalty. For example, if you use an identity matrix to penalize some coefficients that are to be viewed as i.i.d.
Gaussian random effects, then their estimated variance will be the estimated scale parameter divided by the estimate of the
smoothing parameter, for this penalty. See the `rail' example below.
P-values for penalized parametric terms should be treated with caution. If you must have them, then use the option \code{freq=TRUE} in
\code{\link{anova.gam}} and \code{\link{summary.gam}}, which will tend to give reasonable results for random effects implemented this way,
but not for terms with a rank defficient penalty (or penalties with a wide eigen-spectrum).
}
......
......@@ -112,7 +112,10 @@ b1 <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=negbin(c(1,10)),
plot(b1,pages=1)
print(b1)
## unknown theta via outer iteration and AIC search...
## unknown theta via outer iteration and AIC search
## (quite slow, which is why it's commented out for
## checking)...
\dontrun{
b2<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=negbin(c(1,10)),
data=dat)
plot(b2,pages=1)
......@@ -124,9 +127,9 @@ b2a <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=negbin(c(1,10)),
plot(b2a,pages=1)
print(b2a)
## how to retrieve Theta...
b2a$family$getTheta()
}
## unknown theta via outer iteration and AIC search
## over a discrete set of values...
......
......@@ -54,7 +54,7 @@ Binomial models other than binary are ok.
\references{
N.H. Augustin, E-A Sauleaub, S.N. Wood (2012) On quantile quantile plots for generalized linear models
Computational Statistics & Data Analysis.
Computational Statistics & Data Analysis. 56(8), 2404-2409.
M.G. Ben and V.J. Yohai (2004) JCGS 13(1), 36-47.
......
......@@ -7,7 +7,7 @@
summaries from it. (See \code{\link{sink}} to divert output to a file.)
}
\usage{
\method{summary}{gam}(object, dispersion=NULL, freq=TRUE, p.type = 0, ...)
\method{summary}{gam}(object, dispersion=NULL, freq=FALSE, p.type = 0, ...)
\method{print}{summary.gam}(x,digits = max(3, getOption("digits") - 3),
signif.stars = getOption("show.signif.stars"),...)
......@@ -21,9 +21,9 @@ summaries from it. (See \code{\link{sink}} to divert output to a file.)
\item{dispersion}{A known dispersion parameter. \code{NULL} to use estimate or
default (e.g. 1 for Poisson).}
\item{freq}{By default p-values for parametric terms are calculated using the frequentist estimated
covariance matrix of the parameter estimators. If this is set to \code{FALSE} then
the Bayesian covariance matrix of the parameters is used instead. }
\item{freq}{By default p-values for parametric terms are calculated using the Bayesian estimated
covariance matrix of the parameter estimators. If this is set to \code{TRUE} then
the frequentist covariance matrix of the parameters is used instead. }
\item{p.type}{determines how p-values are computed for smooth terms. 0 uses a test statistic with
distribution determined by the un-rounded edf of the term. 1 uses upwardly biased rounding of the edf and -1
......@@ -94,6 +94,12 @@ which case it will have been estimated) the statistic is compared
to an F distribution with k upper d.f. and lower d.f. given by the residual degrees of freedom for the model.
Typically the p-values will be somewhat too low.
By default the p-values for parametric model terms are also based on Wald tests using the Bayesian
covariance matrix for the coefficients. This is appropriate when there are "re" terms present, and is
otherwise rather similar to the results using the frequentist covariance matrix (\code{freq=TRUE}), since
the parametric terms themselves are usually unpenalized. Default P-values for parameteric terms that are
penalized using the \code{paraPen} argument will not be good. However if such terms represent conventional
random effects with full rank penalties, then setting \code{freq=TRUE} is appropriate.
}
......@@ -186,7 +192,9 @@ and Hall/CRC Press.
\author{ Simon N. Wood \email{simon.wood@r-project.org} with substantial
improvements by Henric Nilsson.}
\section{WARNING }{ The p-values are approximate (especially for terms that can be peanlized to zero): do read the details section.
\section{WARNING }{ The p-values are approximate (especially for terms that can be penalized to zero): do read the details section.
P-values for terms penalized via `paraPen' will not be correct unless `freq=TRUE' (and maybe not even then).
}
\seealso{ \code{\link{gam}}, \code{\link{predict.gam}},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment