Commit d9e24579 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.7-17

parent b955da99
Package: mgcv
Version: 1.7-16
Version: 1.7-17
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: Mixed GAM Computation Vehicle with GCV/AIC/REML smoothness
......@@ -14,6 +14,6 @@ Suggests: nlme (>= 3.1-64), splines, Matrix, parallel
LazyLoad: yes
ByteCompile: yes
License: GPL (>= 2)
Packaged: 2012-04-30 07:09:06 UTC; sw283
Packaged: 2012-05-23 08:29:33 UTC; sw283
Repository: CRAN
Date/Publication: 2012-04-30 08:22:16
Date/Publication: 2012-05-23 09:44:27
9f562aa60504f1265daa8ff8095b6333 *DESCRIPTION
09cc0a6268c851dbf582b6b0c49c3cda *DESCRIPTION
50152051f123a389d421aa3130dce252 *NAMESPACE
cf4210b25d2ece355a79cb8ed5e4455a *R/bam.r
f4f5c9bb8776c2248e088c9ee3517208 *R/fast-REML.r
ccbfea150b6066c79b431a94f410f00c *R/bam.r
a868f81bfdea5adc552233d1f77c0bbe *R/fast-REML.r
b160632e8f38fa99470e2f8cba82a495 *R/gam.fit3.r
902657a0ee2dedc3fdfa501bf3b37c5b *R/gam.sim.r
ad57e83090b4633ee50041fd3571c016 *R/gamm.r
a7d790a4fe2640fd69646e1dcf161d80 *R/mgcv.r
0462c2304ecf6ac6392622347b612ccf *R/mgcv.r
5c5f68e76697c356b95e084bee5d7776 *R/plots.r
bb2b4220a103364afc87249157a040b7 *R/smooth.r
b662840dd06dc05bca6930ce0864d3b4 *R/smooth.r
fb66d6c18398411a99ffcb788b854f13 *R/sparse.r
020a4b9253d806cb55b0521412715dc7 *changeLog
366a2457115436852dff53237d746452 *changeLog
e468195a83fab90da8e760c2c3884bd3 *data/columb.polys.rda
40874e3ced720a596750f499ded8a60a *data/columb.rda
88d77139cc983317b6acd8c5f1252ab9 *gnugpl2.txt
......@@ -18,7 +18,7 @@ f693920e12f8a6f2b6cab93648628150 *index
5bf12ddc0dab9daae72271b96a15c539 *man/Predict.matrix.Rd
c51c9b8c9c73f81895176ded39b91394 *man/Predict.matrix.cr.smooth.Rd
612ab6354541ebe38a242634d73b66ba *man/Tweedie.Rd
6d711de718a09e1e1ae2a6967abade33 *man/anova.gam.Rd
085090ec00015c13044857b56d7440dc *man/anova.gam.Rd
fa6e8f98dc01de508e95d1008ae84d60 *man/bam.Rd
b385d6d5419d0d6aefe03af1a79d5c4e *man/bam.update.Rd
4e925cb579f4693d1b8ec2d5092c0b37 *man/cSplineDes.Rd
......@@ -32,8 +32,8 @@ f764fb7cb9e63ff341a0075a3854ab5d *man/exclude.too.far.Rd
9ac808f5a2a43cf97f24798c0922c9bf *man/formXtViX.Rd
bb099e6320a6c1bd79fe4bf59e0fde08 *man/formula.gam.Rd
6f405acde2d7b6f464cf45f5395113ba *man/full.score.Rd
39b4fa3782cc33a445d34a9b26df44f8 *man/gam.Rd
69c6ef61a3cfc397cbeacd21e5e6cc9b *man/gam.check.Rd
d7781fc09ad742a63746ea92a927eee4 *man/gam.Rd
eb8477a38223fb3d5f94f98e6958ed83 *man/gam.check.Rd
96c9417e4ac5d79ec9ed3f363adfc4e9 *man/gam.control.Rd
fd98327327ba74bb1a61a6519f12e936 *man/gam.convergence.Rd
58ab3b3d6f4fd0d008d73c3c4e6d3305 *man/gam.fit.Rd
......@@ -80,7 +80,7 @@ df63d7045f83a1dc4874fcac18a2303c *man/predict.gam.Rd
a594eb641cae6ba0b83d094acf4a4f81 *man/print.gam.Rd
5311a1e83ae93aef5f9ae38f7492536a *man/qq.gam.Rd
f77ca1471881d2f93c74864d076c0a0e *man/rTweedie.Rd
827743e1465089a859a877942ba2f4a9 *man/random.effects.Rd
354798b41bf766baaa4d0bad85274c25 *man/random.effects.Rd
37669f97e17507f3ae2d6d1d74feb9d7 *man/residuals.gam.Rd
472cc8de5077f5511fe7eb2ad454767e *man/rig.Rd
7258dfc0149fff020054117fd2ee6bd8 *man/s.Rd
......@@ -90,7 +90,7 @@ f77ca1471881d2f93c74864d076c0a0e *man/rTweedie.Rd
9f05449c0114748608f99693ec25cf3f *man/smooth.construct.cr.smooth.spec.Rd
131c3f2131138ba5d6f6bcde4be5ac31 *man/smooth.construct.ds.smooth.spec.Rd
1bb6748d4d2934e48f0572bc5114ffcb *man/smooth.construct.fs.smooth.spec.Rd
e0c041d7ec47290741e82fd71c408995 *man/smooth.construct.mrf.smooth.spec.Rd
772e6c18d25cfaf3d9c194031ee042fe *man/smooth.construct.mrf.smooth.spec.Rd
78688702b6396f24692b74c554483428 *man/smooth.construct.ps.smooth.spec.Rd
d202c6718fb1138fdd99e6102250aedf *man/smooth.construct.re.smooth.spec.Rd
53d7986dd7a54c1edb13ce84dbfe34a2 *man/smooth.construct.sos.smooth.spec.Rd
......@@ -101,7 +101,7 @@ d202c6718fb1138fdd99e6102250aedf *man/smooth.construct.re.smooth.spec.Rd
5ae47a140393009e3dba7557af175170 *man/sp.vcov.Rd
83bd8e097711bf5bd0fff09822743d43 *man/spasm.construct.Rd
700699103b50f40d17d3824e35522c85 *man/step.gam.Rd
dd54c87fb87c284d3894410f50550047 *man/summary.gam.Rd
05aa756707171bf7581bc8344d0f148f *man/summary.gam.Rd
7f383eaaca246c8bf2d5b74d841f7f8a *man/t2.Rd
04076444b2c99e9287c080298f9dc1d7 *man/te.Rd
c3c23641875a293593fe4ef032b44aae *man/tensor.prod.model.matrix.Rd
......
......@@ -356,7 +356,7 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, coef=NULL,etas
## carry forward scale estimate if possible...
if (scale>0) log.phi <- log(scale) else {
if (iter>1) log.phi <- log(object$scale) else {
if (is.null(coef)||qrx$y.norm2==0) log.phi <- log(var(G$y)*.05) else
if (is.null(coef)||qrx$y.norm2==0) log.phi <- log(var(as.numeric(G$y))*.05) else
log.phi <- log(qrx$y.norm2/(nobs+nobs.extra))
}
}
......@@ -395,7 +395,8 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, coef=NULL,etas
if (method=="GCV.Cp") {
object <- list()
object$coefficients <- fit$b
object$edf <- post$edf
object$edf <- post$edf
object$edf1 <- post$edf1
object$full.sp <- fit$sp.full
object$gcv.ubre <- fit$score
object$hat <- post$hat
......@@ -423,8 +424,10 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, coef=NULL,etas
if (method=="fREML") { ## do expensive cov matrix cal only at end
res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=TRUE,scale=scale)
object$edf <- res$edf
object$edf1 <- res$edf1
object$hat <- res$hat
object$Vp <- res$V
object$Vp <- res$Vp
object$Ve <- res$Ve
}
if (!conv)
......@@ -567,6 +570,7 @@ bgam.fit2 <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NU
object <- list()
object$coefficients <- fit$b
object$edf <- post$edf
object$edf1 <- post$edf1
object$full.sp <- fit$sp.full
object$gcv.ubre <- fit$score
object$hat <- post$hat
......@@ -901,16 +905,16 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level
um <- Sl.Xprep(Sl,qrx$R)
lambda.0 <- initial.sp(qrx$R,G$S,G$off)
lsp0 <- log(lambda.0) ## initial s.p.
if (scale<=0) log.phi <- log(var(G$y)*.05) else ## initial phi guess
if (scale<=0) log.phi <- log(var(as.numeric(G$y))*.05) else ## initial phi guess
log.phi <- log(scale)
fit <- fast.REML.fit(um$Sl,um$X,qrx$f,rho=lsp0,L=G$L,rho.0=G$lsp0,
log.phi=log.phi,phi.fixed=scale>0,rss.extra=rss.extra,
nobs =n,Mp=um$Mp)
res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=TRUE,scale=scale)
object <- list(coefficients=res$beta,edf=res$edf,full.sp = exp(fit$rho.full),
object <- list(coefficients=res$beta,edf=res$edf,edf1=res$edf1,full.sp = exp(fit$rho.full),
gcv.ubre=fit$reml,hat=res$hat,mgcv.conv=list(iter=fit$iter,
message=fit$conv),rank=ncol(um$X),
Ve=NULL,Vp=res$V,scale.estimated = scale<=0,outer.info=fit$outer.info,
Ve=res$Ve,Vp=res$Vp,scale.estimated = scale<=0,outer.info=fit$outer.info,
optimizer=c("perf","newton"))
if (scale<=0) { ## get sp's and scale estimate
nsp <- length(fit$rho)
......@@ -949,6 +953,7 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level
object <- list()
object$coefficients <- fit$b
object$edf <- post$edf
object$edf1 <- post$edf1
object$full.sp <- fit$sp.full
object$gcv.ubre <- fit$score
object$hat <- post$hat
......
......@@ -569,7 +569,8 @@ fast.REML.fit <- function(Sl,X,y,rho,L=NULL,rho.0=NULL,log.phi=0,phi.fixed=TRUE,
rho.0 <- c(rho.0,0)
L <- rbind(cbind(L,L[,1]*0),c(L[1,]*0,1))
}
grad <- t(L)%*% best$reml1;hess <- t(L)%*% best$reml2%*%L
grad <- as.numeric(t(L)%*% best$reml1)
hess <- t(L)%*% best$reml2%*%L
grad2 <- diag(hess)
## create and index for the unconverged...
......@@ -581,7 +582,7 @@ fast.REML.fit <- function(Sl,X,y,rho,L=NULL,rho.0=NULL,log.phi=0,phi.fixed=TRUE,
for (iter in 1:200) { ## the Newton loop
## Work only with unconverged (much quicker under indefiniteness)
hess <- (t(L)%*% best$reml2%*%L)[uconv.ind,uconv.ind]
grad <- (t(L)%*%best$reml1)[uconv.ind]
grad <- as.numeric(t(L)%*%best$reml1)[uconv.ind]
## check that Hessian is +ve def. Fix if not.
eh <- eigen(hess,symmetric=TRUE)
## flip negative eigenvalues to get +ve def...
......@@ -616,7 +617,8 @@ fast.REML.fit <- function(Sl,X,y,rho,L=NULL,rho.0=NULL,log.phi=0,phi.fixed=TRUE,
## At this stage the step has been successful.
## Need to test for convergence...
converged <- TRUE
grad <- t(L)%*%trial$reml1;hess <- t(L)%*%trial$reml2%*%L;grad2 <- diag(hess)
grad <- as.numeric(t(L)%*%trial$reml1)
hess <- t(L)%*%trial$reml2%*%L;grad2 <- diag(hess)
## idea in following is only to exclude terms with zero first and second derivative
## from optimization, as it is only these that slow things down if included...
uconv.ind <- (abs(grad) > reml.scale*conv.tol*.1)|(abs(grad2)>reml.scale*conv.tol*.1)
......@@ -754,13 +756,17 @@ Sl.postproc <- function(Sl,fit,undrop,X0,cov=FALSE,scale = -1) {
PP <- matrix(0,np,np)
PP[undrop,undrop] <- Sl.repara(fit$rp,fit$PP,inverse=TRUE)
PP <- Sl.initial.repara(Sl,PP,inverse=TRUE)
XPP <- crossprod(t(X0),PP)*X0
hat <- rowSums(XPP)
edf <- colSums(XPP)
#XPP <- crossprod(t(X0),PP)*X0
#hat <- rowSums(XPP);edf <- colSums(XPP)
XPP <- crossprod(t(X0),PP)
hat <- rowSums(XPP*X0)
F <- crossprod(XPP,X0)
edf <- diag(F)
edf1 <- 2*edf - rowSums(t(F)*F)
## edf <- rowSums(PP*crossprod(X0)) ## diag(PP%*%(t(X0)%*%X0))
if (scale<=0) scale <- fit$rss/(fit$nobs - sum(edf))
V <- PP * scale ## cov matrix
return(list(beta=beta,V=V,edf=edf,hat=hat))
Vp <- PP * scale ## cov matrix
return(list(beta=beta,Vp=Vp,Ve=F%*%Vp,edf=edf,edf1=edf1,hat=hat))
} else return(list(beta=beta))
}
......
This diff is collapsed.
This diff is collapsed.
** denotes quite substantial/important changes
*** denotes really big changes
1.7-17
** p-values for terms with no un-penalized components were poor. The theory on
which the p-value computation for other terms is based shows why this is,
and allows fixes to be made. These are now implemented.
* summary p value bug fix --- smooths with no null space had a bug in
lower tail of p-value computation, yielding far too low values. Fixed.
* bam now outputs frequentist cov matrix Ve and alternative effective degrees
of freedom edf1, in all cases.
* smoothCon now adjusts null.space.dim on constraint absorption.
* Prediction with matrix arguments (i.e. for models using summation
convention) could be very memory hungry. This in turn meant that
bam could run out of memory when fitting models with such terms.
The problem was memory inefficient handling of duplicate evaluations.
Now fixed by modification of PredictMat
* bam could fail if the response vector was of class matrix. fixed.
* reduced rank mrf smooths with supplied penalty could use the incorrect
penalty rank when computing the reduced rank basis and fail. fixed
thanks to Fabian Scheipl.
* a cr basis efficiency change could lead to old fitted model objects causing
segfaults when used with current mgcv version. This is now caught.
1.7-16
* There was an unitialized variable bug in the 1.7-14 re-written "cr" basis
......
......@@ -6,14 +6,13 @@
\description{ Performs hypothesis tests relating to one or more fitted
\code{gam} objects. For a single fitted \code{gam} object, Wald tests of
the significance of each parametric and smooth term are performed. Otherwise
the fitted models are compared using an analysis of deviance table. The tests
are usually approximate, unless the models are un-penalized. Simulation evidence
suggests that best p-value performance results from using ML estimated smoothing parameters.
the fitted models are compared using an analysis of deviance table: this latter approach
should not be use to test the significance of terms which can be penalized
to zero. See details.
}
\usage{
\method{anova}{gam}(object, ..., dispersion = NULL, test = NULL,
freq = FALSE,p.type=0)
freq = TRUE,p.type=0)
\method{print}{anova.gam}(x, digits = max(3, getOption("digits") - 3),...)
}
%- maybe also `usage' for other objects documented here.
......@@ -23,20 +22,30 @@ suggests that best p-value performance results from using ML estimated smoothing
\item{dispersion}{ a value for the dispersion parameter: not normally used.}
\item{test}{what sort of test to perform for a multi-model call. One of
\code{"Chisq"}, \code{"F"} or \code{"Cp"}. }
\item{freq}{whether to use frequentist or Bayesian approximations for single smooth term
\item{freq}{whether to use frequentist or Bayesian approximations for parametric term
p-values. See \code{\link{summary.gam}} for details.}
\item{p.type}{selects exact test statistic to use for single smooth term p-values. See
\code{\link{summary.gam}} for details.}
\item{digits}{number of digits to use when printing output.}
}
\details{ If more than one fitted model is provided than \code{anova.glm} is
used. If only one model is provided then the significance of each model term
is assessed using Wald tests: see \code{\link{summary.gam}} for details of the
actual computations.
In the latter case \code{print.anova.gam} is used as the
printing method. Note that the p-values for smooth terms are approximate only:
simulation evidence suggests that they work best with ML smoothness selection
(REML coming a close second).
used, with the difference in model degrees of freedom being taken as the difference
in effective degress of freedom. The p-values resulting from this are only approximate,
and must be used with care. The approximation is most accurate when the comparison
relates to unpenalized terms, or smoothers with a null space of dimension greater than zero.
(Basically we require that the difference terms could be well approximated by unpenalized
terms with degrees of freedom approximately the effective degrees of freedom). In simualtions the
p-values are usually slightly to low. For terms with a zero-dimensional null space
(i.e. those which can be penalized to zero) the approximation is often very poor, and significance
can be greatly overstated: i.e. p-values are often substantially too low. This case applies to random effect terms.
Note also that in the multi-model call to \code{anova.gam}, it is quite possible for a model with more terms to end up with lower effective degrees of freedom, but better fit, than the notionally null model with fewer terms. In such cases it is very rare that it makes sense to perform any sort of test, since there is then no basis on which to accept the notional null model.
If only one model is provided then the significance of each model term
is assessed using Wald tests: see \code{\link{summary.gam}} for details. The p-values
provided here are better justified than in the multi model case, and have close to the
correct distribution under the null for smooths with a non-zero dimensional null space (i.e. terms that can-not be penalized to zero). ML or REML smoothing parameter selection leads to the best results in simulations as they tend to avoid occasional severe undersmoothing. In the single model case \code{print.anova.gam} is used as the
printing method.
}
......@@ -54,7 +63,8 @@ which is in fact an object returned from \code{\link{summary.gam}}.
\author{ Simon N. Wood \email{simon.wood@r-project.org} with substantial
improvements by Henric Nilsson.}
\section{WARNING}{ P-values for smooth terms are only approximate.
\section{WARNING}{ If models 'a' and 'b' differ only in terms with no un-penalized components then
p values from anova(a,b) are unreliable, and usually much too low.
}
\seealso{ \code{\link{gam}}, \code{\link{predict.gam}},
......
......@@ -17,21 +17,9 @@ with smoothing parameters selected by GCV/UBRE/AIC/REML or by regression splines
fixed degrees of freedom (mixtures of the two are permitted). Multi-dimensional smooths are
available using penalized thin plate regression splines (isotropic) or tensor product splines
(when an isotropic smooth is inappropriate). For an overview of the smooths available see \code{\link{smooth.terms}}.
For more on specifying models see \code{\link{gam.models}}, \code{\link{random.effects}} and \code{\link{linear.functional.terms}}.
For more on model selection see \code{\link{gam.selection}}.
For more on specifying models see \code{\link{gam.models}}, \code{\link{random.effects}} and \code{\link{linear.functional.terms}}. For more on model selection see \code{\link{gam.selection}}. Do read \code{\link{gam.check}} and \code{\link{choose.k}}.
\code{gam()} is not a clone of what S-PLUS provides: the major
differences are (i) that by default estimation of the
degree of smoothness of model terms is part of model fitting, (ii) a
Bayesian approach to variance estimation is employed that makes for easier
confidence interval calculation (with good coverage probabilities), (iii) that the model
can depend on any (bounded) linear functional of smooth terms, (iv) the parametric part of the model can be penalized, (v) simple random effects can be incorporated, and
(vi) the facilities for incorporating smooths of more than one variable are
different: specifically there are no \code{lo} smooths, but instead (a) \code{s}
terms can have more than one argument, implying an isotropic smooth and (b) \code{te} smooths are
provided as an effective means for modelling smooth interactions of any
number of variables via scale invariant tensor product smooths. See \link[gam]{gam}
from package \code{gam}, for GAMs via the original Hastie and Tibshirani approach.
See \link[gam]{gam} from package \code{gam}, for GAMs via the original Hastie and Tibshirani approach (see details for differences to this implementation).
For very large datasets see \code{\link{bam}}, for mixed GAM see \code{\link{gamm}} and \code{\link{random.effects}}.
}
......@@ -255,7 +243,21 @@ general linear functionals of smooths, via the summation convention mechanism de
Details of the default underlying fitting methods are given in Wood (2011
and 2004). Some alternative methods are discussed in Wood (2000 and 2006).
}
\code{gam()} is not a clone of Trevor Hastie's oroginal (as supplied in S-PLUS or package \link[gam]{gam}) The major
differences are (i) that by default estimation of the
degree of smoothness of model terms is part of model fitting, (ii) a
Bayesian approach to variance estimation is employed that makes for easier
confidence interval calculation (with good coverage probabilities), (iii) that the model
can depend on any (bounded) linear functional of smooth terms, (iv) the parametric part of the model can be penalized,
(v) simple random effects can be incorporated, and
(vi) the facilities for incorporating smooths of more than one variable are
different: specifically there are no \code{lo} smooths, but instead (a) \code{\link{s}}
terms can have more than one argument, implying an isotropic smooth and (b) \code{\link{te}} or \code{\link{t2}} smooths are
provided as an effective means for modelling smooth interactions of any
number of variables via scale invariant tensor product smooths. Splines on the sphere, Duchon splines
and Gaussian Markov Random Fields are also available. See \link[gam]{gam}
from package \code{gam}, for GAMs via the original Hastie and Tibshirani approach.
}
\references{
......
......@@ -27,12 +27,16 @@ gam.check(b, old.style=FALSE,
\value{A vector of reference quantiles for the residual distribution, if these can be computed.}
\details{ This function plots 4 standard diagnostic plots, some smoothing parameter estimation
\details{ Checking a fitted \code{gam} is like checking a fitted \code{glm}, with two main differences. Firstly,
the basis dimensions used for smooth terms need to be checked, to ensure that they are not so small that they force
oversmoothing: the defaults are arbitrary. \code{\link{choose.k}} provides more detail, but the diagnostic tests described below and reported by this function may also help. Secondly, fitting may not always be as robust to violation of the distributional assumptions as would be the case for a regular GLM, so slightly more care may be needed here. In particular, the thoery of quasi-likelihood implies that if the mean variance relationship is OK for a GLM, then other departures from the assumed distribution are not problematic: GAMs can sometimes be more sensitive. For example, un-modelled overdispersion will typically lead to overfit, as the smoothness selection criterion tries to reduce the scale parameter to the one specified. Similarly, it is not clear how sensitive REML and ML smoothness selection will be to deviations from the assumed response dsistribution. For these reasons this routine uses an enhanced residual QQ plot.
This function plots 4 standard diagnostic plots, some smoothing parameter estimation
convergence information and the results of tests which may indicate if the smoothing basis dimension
for a term is too low.
Usually the 4 plots are various residual plots. For the default optimization methods the convergence information is summarized in a
readable way, but for other optimization methods, whatever is returned by way of
Usually the 4 plots are various residual plots. For the default optimization methods the convergence information is summarized in a readable way, but for other optimization methods, whatever is returned by way of
convergence diagnostics is simply printed.
The test of whether the basis dimension for a smooth is adequate is based on computing an estimate of the residual variance
......
......@@ -17,7 +17,9 @@ of parametric random effects. It can not be used for models with more coefficien
than \code{gamm} or \code{gamm4}, when the number of random effects is modest.
To facilitate the use of random effects with \code{gam}, \code{\link{gam.vcomp}} is a utility routine for converting
smoothing parameters to variance components. It also provides confidence intervals, if smoothness estimation is by ML or REML.
smoothing parameters to variance components. It also provides confidence intervals, if smoothness estimation is by ML or REML.
Note that treating random effects as smooths does not remove the usual problems associated with testing variance components for equality to zero: see \code{\link{summary.gam}} and \code{\link{anova.gam}}.
}
......
......@@ -88,7 +88,7 @@ Wood S.N. (2006) Generalized additive models: an intriduction with R CRC.
}
\author{ Simon N. Wood \email{simon.wood@r-project.org} and Thomas Kneib
(Fabian Scheipl prorotyped the low rank MRF idea) }
(Fabian Scheipl prototyped the low rank MRF idea) }
\seealso{\code{\link{in.out}}, \code{\link{polys.plot}}}
......
......@@ -7,7 +7,7 @@
summaries from it. (See \code{\link{sink}} to divert output to a file.)
}
\usage{
\method{summary}{gam}(object, dispersion=NULL, freq=FALSE, p.type = 0, ...)
\method{summary}{gam}(object, dispersion=NULL, freq=TRUE, p.type = 0, ...)
\method{print}{summary.gam}(x,digits = max(3, getOption("digits") - 3),
signif.stars = getOption("show.signif.stars"),...)
......@@ -21,14 +21,14 @@ summaries from it. (See \code{\link{sink}} to divert output to a file.)
\item{dispersion}{A known dispersion parameter. \code{NULL} to use estimate or
default (e.g. 1 for Poisson).}
\item{freq}{By default p-values for individual terms are calculated using the Bayesian estimated
covariance matrix of the parameter estimators. If this is set to TRUE then
the frequentist covariance matrix of the parameters is used instead. See details. }
\item{freq}{By default p-values for parametric terms are calculated using the frequentist estimated
covariance matrix of the parameter estimators. If this is set to \code{FALSE} then
the Bayesian covariance matrix of the parameters is used instead. }
\item{p.type}{determines how p-values are computed when \code{freq==FALSE}. 0 uses a test statistic with
\item{p.type}{determines how p-values are computed for smooth terms. 0 uses a test statistic with
distribution determined by the un-rounded edf of the term. 1 uses upwardly biased rounding of the edf and -1
uses a version of the test statistic with a null distribution that has to be simulated. Other options are
poor, generate a warning, and are only of research interest. See details.
uses a version of the test statistic with a null distribution that has to be simulated. 5 is the approximation
in Wood (2006). Other options are poor, generate a warning, and are only of research interest. See details.
}
\item{digits}{controls number of digits printed in output.}
......@@ -45,28 +45,14 @@ freedom.
Let \eqn{ {\bf P}_i}{P_i} be the matrix
giving the parameters of the ith smooth when applied to the data (or pseudodata in the generalized case) and let \eqn{ {\bf X}}{X}
be the design matrix of the model. Then \eqn{ tr({\bf XP}_i )}{tr(XP_i)} is the edf for the ith term. Clearly this
definition causes the edf's to add up properly!
definition causes the edf's to add up properly! An alternative version of EDF is more appropriate for p-value computation, and is based
on the trace of \eqn{ 2{\bf A} - {\bf AA}}{2A - AA}.
\code{print.summary.gam} tries to print various bits of summary information useful for term selection in a pretty way.
If \code{freq=TRUE} then the frequentist approximation for p-values of smooth terms described in section
4.8.5 of Wood (2006) is used. The approximation is not great. If \eqn{ {\bf p}_i}{p_i}
is the parameter vector for the ith smooth term, and this term has estimated
covariance matrix \eqn{ {\bf V}_i}{V_i} then the
statistic is \eqn{ {\bf p}_i^\prime {\bf V}_i^{k-} {\bf
p}_i}{p_i'V_i^{k-}p_i}, where \eqn{ {\bf V}^{k-}_i}{V_i^{k-}} is the rank k
pseudo-inverse of \eqn{ {\bf V_i}}{V_i}, and k is estimated rank of
\eqn{{\bf V_i}}{V_i}. p-values are obtained as follows. In the case of
known dispersion parameter, they are obtained by comparing the chi.sq statistic to the
chi-squared distribution with k degrees of freedom, where k is the estimated
rank of \eqn{ {\bf V_i}}{V_i}. If the dispersion parameter is unknown (in
which case it will have been estimated) the statistic is compared
to an F distribution with k upper d.f. and lower d.f. given by the residual degrees of freedom for the model.
Typically the p-values will be somewhat too low.
If \code{freq=FALSE} then `Bayesian p-values' are returned for the smooth terms, based on a
test statistic motivated
by an extension of Nychka's (1988) analysis of the frequentist properties of Bayesian confidence intervals for smooths.
Unless \code{p.type=5}, p-values for smooth terms are usually based on a
test statistic motivated by an extension of Nychka's (1988) analysis of the frequentist properties
of Bayesian confidence intervals for smooths.
These have better frequentist performance (in terms of power and distribution under the null)
than the alternative strictly frequentist approximation. Let \eqn{\bf f}{f} denote the vector of
values of a smooth term evaluated at the original
......@@ -74,8 +60,7 @@ covariate values and let \eqn{{\bf V}_f}{V_f} denote the corresponding Bayesian
\eqn{{\bf V}_f^{r-}}{V*_f} denote the rank \eqn{r}{r} pseudoinverse of \eqn{{\bf V}_f}{V_f}, where \eqn{r}{r} is the
EDF for the term. The statistic used is then
\deqn{T = {\bf f}^T {\bf V}_f^{r-}{\bf f}}{T = f'V*_f f}
(this can be calculated efficiently without forming the pseudoinverse explicitly). \eqn{T}{T} is compared to a
chi-squared distribution with degrees of freedom given by the EDF for the term,
(this can be calculated efficiently without forming the pseudoinverse explicitly). \eqn{T}{T} is compared to an approximation to an appropriate mizture of chi-squared distributions with degrees of freedom given by the EDF for the term,
or \eqn{T}{T} is used as a component in an F ratio statistic if the
scale parameter has been estimated.
......@@ -84,12 +69,32 @@ approximation varying smoothly between the bounding integer rank approximations,
biased rounding of the EDF: values less than .05 above the preceding integer are rounded down, while other values are rounded up. Another option (\code{p.type==-1}) uses a statistic of formal rank given by the number of coefficients for the smooth, but with its terms weighted by the eigenvalues of the covariance matrix, so that penalized terms are down-weighted, but the null distribution requires simulation. Other options for \code{p.type} are 2 (naive rounding), 3 (round up), 4 (numerical rank determination): these are poor options for theoretically known reasons, and will generate a warning.
The resulting p-value also has a Bayesian interpretation:
the probability of observing an \eqn{\bf f}{f} less probable than \eqn{\bf 0}{0}, under the approximation for the posterior for \eqn{\bf f}{f} implied by the truncation used in the test statistic.
the probability of observing an \eqn{\bf f}{f} less probable than \eqn{\bf 0}{0},
under the approximation for the posterior for \eqn{\bf f}{f} implied by the truncation used in the test statistic.
Note that the p-values distributional approximations start to break down below one effective degree of freedom, and p-values are not reported below 0.5 degrees of freedom.
Note that for terms with no unpenalized terms the Nychka (1988) requirement for smoothing bias to be substantially
less than variance breaks down (see e.g. appendix of Marra and Wood, 2012), and this results in incorrect null distribution
for p-values computed using the above approach. In this case it is necessary to fall back on slightly cruder frequentist approximations
(which may overstate significance a little). The frequentist covariance matrix is used in place of the Bayesian version, and the statistic rank is set to 1 for EDF < 1. In the case of random effects, a further modification is required, since the eigen spectrum of the penalty is then flat and a good unpenalized approximation with rank given by the EDF of the term is not generally available, further breaking the theory used for other smoothers. In this case the rank of the test statistic is set to the full rank of the term, and the p-value relates to testing whether the individual random effects were in fact all zero (despite the estimated posterior modes being those observed).
In simulations the p-values have best behaviour under ML smoothness selection, with REML coming second.
If \code{p.type=5} then the frequentist approximation for p-values of smooth terms described in section
4.8.5 of Wood (2006) is used. The approximation is not great. If \eqn{ {\bf p}_i}{p_i}
is the parameter vector for the ith smooth term, and this term has estimated
covariance matrix \eqn{ {\bf V}_i}{V_i} then the
statistic is \eqn{ {\bf p}_i^\prime {\bf V}_i^{k-} {\bf
p}_i}{p_i'V_i^{k-}p_i}, where \eqn{ {\bf V}^{k-}_i}{V_i^{k-}} is the rank k
pseudo-inverse of \eqn{ {\bf V_i}}{V_i}, and k is estimated rank of
\eqn{{\bf V_i}}{V_i}. p-values are obtained as follows. In the case of
known dispersion parameter, they are obtained by comparing the chi.sq statistic to the
chi-squared distribution with k degrees of freedom, where k is the estimated
rank of \eqn{ {\bf V_i}}{V_i}. If the dispersion parameter is unknown (in
which case it will have been estimated) the statistic is compared
to an F distribution with k upper d.f. and lower d.f. given by the residual degrees of freedom for the model.
Typically the p-values will be somewhat too low.
}
\value{\code{summary.gam} produces a list of summary information for a fitted \code{gam} object.
......@@ -167,6 +172,10 @@ by scale parameter.}
\references{
Marra, G and S.N. Wood (2012) Coverage Properties of Confidence Intervals for Generalized Additive
Model Components. Scandinavian Journal of Statistics, 39(1), 53-74.
Nychka (1988) Bayesian Confidence Intervals for Smoothing Splines.
Journal of the American Statistical Association 83:1134-1143.
......@@ -177,7 +186,7 @@ and Hall/CRC Press.
\author{ Simon N. Wood \email{simon.wood@r-project.org} with substantial
improvements by Henric Nilsson.}
\section{WARNING }{ The p-values are approximate: do read the details section.
\section{WARNING }{ The p-values are approximate (especially for terms that can be peanlized to zero): do read the details section.
}
\seealso{ \code{\link{gam}}, \code{\link{predict.gam}},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment