Commit 63cbeb18 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.3-24

parent 58dc4ce5
Package: mgcv
Version: 1.3-23
Version: 1.3-24
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: GAMs with GCV smoothness estimation and GAMMs by REML/PQL
......@@ -12,4 +12,4 @@ Imports: graphics, stats
Suggests: nlme (>= 3.1-64), MASS (>= 7.2-2)
LazyLoad: yes
License: GPL version 2 or later
Packaged: Thu Mar 15 15:03:30 2007; simon
Packaged: Sun May 6 21:50:31 2007; simon
......@@ -14,7 +14,7 @@ anova.gam,coef.pdIdnot,coef.pdTens,
logDet.pdIdnot,
initial.sp,logLik.gam,
magic, magic.post.proc, mgcv, mgcv.control, mgcv.find.theta,
mgcv.get.scale, mono.con, mroot, new.name,
mgcv.get.scale, model.matrix.gam, mono.con, mroot, new.name,
notExp,notExp2,notLog,notLog2,pcls,null.space.dimension,
pdConstruct.pdIdnot,pdFactor.pdIdnot,pdMatrix.pdIdnot,pdIdnot,
pdConstruct.pdTens,pdFactor.pdTens,pdMatrix.pdTens,pdTens,
......@@ -54,6 +54,7 @@ S3method(influence, gam)
S3method(cooks.distance, gam)
S3method(formula, gam)
S3method(logLik, gam)
S3method(model.matrix,gam)
S3method(plot, gam)
S3method(predict, gam)
S3method(print, anova.gam)
......
......@@ -659,7 +659,8 @@ smooth.construct.cr.smooth.spec<-function(object,data,knots)
if (is.null(shrink)) {
object$rank<-nk-2
} else object$rank <- nk # penalty rank
object$C <- matrix(oo[[7]],1,nk) # constraint
object$C <- matrix(colSums(object$X),1,ncol(object$X))
## object$C <- matrix(oo[[7]],1,nk) # constraint
object$df<-object$bs.dim-1 # degrees of freedom, given constraint
object$null.space.dim <- 2
object$xp <- oo[[3]] # knot positions
......@@ -1356,6 +1357,8 @@ gam.outer <- function(lsp,fscale,family,control,method,gamma,G,...)
printWarn=FALSE,scoreType=criterion,use.svd=control$newton$use.svd,...)
obj <- b$score
object <- b$object
object$GACV <- object$D2 <- object$P2 <- object$UBRE2 <- object$trA2 <-
object$GACV1 <- object$GACV2 <- object$GCV2 <- object$D1 <- object$P1 <- NULL
lsp <- b$lsp
b <- list(conv=b$conv,iter=b$iter,grad=b$grad,hess=b$hess) ## return info
} else { ## methods calling gam.fit2
......@@ -1582,26 +1585,49 @@ gam.check <- function(b)
xlab="linear predictor",ylab="residuals");
hist(residuals(b),xlab="Residuals",main="Histogram of residuals");
plot(fitted(b),b$y,xlab="Fitted Values",ylab="Response",main="Response vs. Fitted Values")
if (b$mgcv.conv$iter>0)
cat("\nSmoothing parameter selection converged after",b$mgcv.conv$iter,"iteration")
else
cat("\nModel required no smoothing parameter selection")
if (b$mgcv.conv$iter>1) cat("s")
if ((fit.method=="mgcv"&&b$mgcv.conv$step.fail)||(b$fit.method=="magic"&&!b$mgcv.conv$fully.converged))
cat(" by steepest\ndescent step failure.\n") else cat(".\n")
if (fit.method=="mgcv")
{ if (length(b$smooth)>1&&b$mgcv.conv$iter>0)
{ cat("The mean absolute",sc.name,"score gradient at convergence was ",mean(abs(b$mgcv.conv$g)),".\n")
if (sum(b$mgcv.conv$e<0)) cat("The Hessian of the",sc.name ,"score at convergence was not positive definite.\n")
else cat("The Hessian of the",sc.name,"score at convergence was positive definite.\n")
## now summarize convergence information
cat("\nfit method:",b$fit.method)
if (!is.null(b$outer.info)) { ## summarize convergence information
if (b$fit.method=="GACV based outer iter. - newton, exact hessian."||
b$fit.method=="deviance based outer iter. - newton, exact hessian.")
{ boi <- b$outer.info
cat("\n",boi$conv," after ",boi$iter," iteration",sep="")
if (boi$iter==1) cat(".") else cat("s.")
cat("\ngradient range [",min(boi$grad),",",max(boi$grad),"] (score ",b$gcv.ubre,
" & scale ",b$sig2,").",sep="")
ev <- eigen(boi$hess)$values
if (min(ev)>0) cat("\nHessian positive definite, ") else cat("\n")
cat("eigenvalue range [",min(ev),",",max(ev),"].\n",sep="")
} else { ## just default print of information...
cat("\n");print(b$outer.info)
}
} else { ## perf iter or AM case
if (b$mgcv.conv$iter==0)
cat("\nModel required no smoothing parameter selection")
else {
cat("\nSmoothing parameter selection converged after",b$mgcv.conv$iter,"iteration")
if (b$mgcv.conv$iter>1) cat("s")
if ((fit.method=="mgcv"&&b$mgcv.conv$step.fail)||(b$fit.method=="magic"&&!b$mgcv.conv$fully.converged))
cat(" by steepest\ndescent step failure.\n") else cat(".\n")
if (fit.method=="mgcv")
{ if (length(b$smooth)>1&&b$mgcv.conv$iter>0)
{ cat("The mean absolute",sc.name,"score gradient at convergence was ",mean(abs(b$mgcv.conv$g)),".\n")
if (sum(b$mgcv.conv$e<0))
cat("The Hessian of the",sc.name ,"score at convergence was not positive definite.\n")
else cat("The Hessian of the",sc.name,"score at convergence was positive definite.\n")
}
if (!b$mgcv.conv$init.ok&&(b$mgcv.conv$iter>0))
cat("Note: the default second smoothing parameter guess failed.\n")
} else
{ cat("The RMS",sc.name,"score gradiant at convergence was",b$mgcv.conv$rms.grad,".\n")
if (b$mgcv.conv$hess.pos.def)
cat("The Hessian was positive definite.\n") else cat("The Hessian was not positive definite.\n")
cat("The estimated model rank was ",b$mgcv.conv$rank,
" (maximum possible: ",b$mgcv.conv$full.rank,")\n",sep="")
}
}
if (!b$mgcv.conv$init.ok&&(b$mgcv.conv$iter>0)) cat("Note: the default second smoothing parameter guess failed.\n")
} else
{ cat("The RMS",sc.name,"score gradiant at convergence was",b$mgcv.conv$rms.grad,".\n")
if (b$mgcv.conv$hess.pos.def)
cat("The Hessian was positive definite.\n") else cat("The Hessian was not positive definite.\n")
cat("The estimated model rank was ",b$mgcv.conv$rank," (maximum possible: ",b$mgcv.conv$full.rank,")\n",sep="")
}
cat("\n")
par(old.par)
......@@ -2048,6 +2074,11 @@ gam.fit <- function (G, start = NULL, etastart = NULL,
}
model.matrix.gam <- function(object,...)
{ if (!inherits(object,"gam")) stop("`object' is not of class \"gam\"")
predict.gam(object,type="lpmatrix",...)
}
predict.gam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,
block.size=1000,newdata.guaranteed=FALSE,na.action=na.pass,...)
{
......@@ -2930,8 +2961,16 @@ summary.gam <- function (object, dispersion = NULL, freq = TRUE, ...)
for (i in 1:nt)
{ ind <- object$assign==i
b <- bp[ind];V <- Vb[ind,ind]
pTerms.df[i] <- nb <- length(b)
pTerms.chi.sq[i] <- b%*%solve(V,b)
## psuedo-inverse needed in case of truncation of parametric space
if (length(b)==1) {
V <- 1/V
pTerms.df[i] <- nb <- 1
pTerms.chi.sq[i] <- V*b*b
} else {
V <- pinv(V,length(b),rank.tol=.Machine$double.eps^.5)
pTerms.df[i] <- nb <- attr(V,"rank")
pTerms.chi.sq[i] <- t(b)%*%V%*%b
}
if (!est.disp)
pTerms.pv[i]<-pchisq(pTerms.chi.sq[i],df=nb,lower.tail=FALSE)
else
......
1.3-24
* summary.gam modified so that it behaves correctly if fitting routines
detect and deal with rank deficiency in parameteric part of a model.
* spring cleaning of help files.
* gam.check modified to report more useful convergence diagnostics.
* `model.matrix.gam' added.
* "cr" basis constructor modified to use the same centering conditions
as other bases (sum to zero over covariates, rather than parameters
sum to zero). This makes centred confidence intervals for smooths, of
the sort used in plot.gam, behave in a similar way for all bases. With
the old "cr" centering constraint there could be high negative
correlation between coefficients of a centered smooth and the intercept:
this could make centred "cr" smooth CIs wider than CIs for other bases
(not really wrong, but disconcerting).
1.3-23
* step size correction bug fixed in gam.fit3. `Perfect' convergence could
......
......@@ -44,17 +44,9 @@ corresponding \code{\link{Predict.matrix}} method function: see the example code
\references{
Wood, S.N. (2000) Modelling and Smoothing Parameter Estimation
with Multiple Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428
Wood S.N. (2006) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114
Wood, S.N. (2004) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. J. Amer. Statist. Ass. 99:637-686
\url{http://www.maths.bath.ac.uk/~sw283/}
}
\author{Simon N. Wood \email{simon.wood@r-project.org}}
......
......@@ -70,7 +70,12 @@ Pinheiro J.C. and Bates, D.M. (2000) Mixed effects Models in S and S-PLUS. Sprin
For details of how GAMMs are set up here for estimation using \code{lme} see:
Wood, S.N. (2006) Low rank scale invariant tensor product smooths for
Generalized Additive Mixed Models. Biometrics
Generalized Additive Mixed Models. Biometrics 62(4):1025-1036
or
Wood S.N. (2006) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
\url{http://www.maths.bath.ac.uk/~sw283/}
}
......
......@@ -18,11 +18,12 @@ fix.family.var(fam)
\details{ Outer iteration GAM estimation requires derivatives of the GCV/UBRE
score, which are obtained by differentiating the P-IRLS GAM fitting iteration
w.r.t. the model smoothing parameters. The expressions for the derivatives
require the second derivative of the link w.r.t. the mean, and the first
derivative of the variance function w.r.t. the mean. These functions add
require the second and third derivatives of the link w.r.t. the mean, and the first
and second derivatives of the variance function w.r.t. the mean. These functions add
functions evaluating these quantities to a family.
If the family already has functions \code{dvar} and \code{d2link} then these
If the family already has functions \code{dvar}, \code{d2var}, \code{d2link}
and \code{d3link} then these
functions simply return the family unmodified: this allows non-standard links
to be used with \code{\link{gam}} when using outer iteration (performance
iteration operates with unmodified families).
......@@ -31,18 +32,22 @@ The \code{dvar} function is a function of a mean vector, \code{mu}, and returns
a vector of corresponding first derivatives of the family variance
function. The \code{d2link} function is also a function of a vector of mean
values, \code{mu}: it returns a vector of second derivatives of the link,
evaluated at \code{mu}.
evaluated at \code{mu}. Higher derivatives are defined similarly.
If modifying your own family, note that you can often get away with supplying
only a \code{dvar} function if your family only requires links that occur in
only a \code{dvar} and \code{d2var}, function if your family only requires links that occur in
one of the standard families.
}
\value{ A family object with extra component functions \code{dvar} and \code{d2link}.
\value{ A family object with extra component functions \code{dvar},
\code{d2var}, \code{d2link} and \code{d3link}.
}
\author{ Simon N. Wood \email{simon.wood@r-project.org}}
\seealso{
\code{\link{gam.fit3}}}
}
\keyword{models} \keyword{regression}%-- one or more ..
......
......@@ -14,10 +14,10 @@ fixDependence(X1,X2,tol=.Machine$double.eps^.5)
\item{X1}{ A matrix.}
\item{X2}{ A matrix, the columns of which may be partially linearly
dependent on the columns of \code{X1}.}
\item{tol}{The tolernce to use when assessing linear dependence.}
\item{tol}{The tolerance to use when assessing linear dependence.}
}
\details{ The algorithm uses a simple approach based on QR decomposition: see
code for details.
Wood (2006, section 4.10.2) for details.
}
\value{ An array of the columns of \code{X2} which are linearly dependent on
......@@ -26,6 +26,10 @@ columns of \code{X1}. \code{NULL} if the two matrices are independent.
\author{ Simon N. Wood \email{simon.wood@r-project.org}}
\references{
Wood S.N. (2006) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
}
\examples{
......
......@@ -36,10 +36,10 @@ For \code{lme} see:
Pinheiro J.C. and Bates, D.M. (2000) Mixed effects Models in S and S-PLUS. Springer
For details of how GAMMs are set up here for estimation using \code{lme} see:
For details of how GAMMs are set up for estimation using \code{lme} see:
Wood, S.N. (2006) Low rank scale invariant tensor product smooths for
Generalized Additive Mixed Models. Biometrics
Generalized Additive Mixed Models. Biometrics 62(4):1025-1036
\url{http://www.maths.bath.ac.uk/~sw283/}
}
......
This diff is collapsed.
......@@ -3,7 +3,8 @@
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Some diagnostics for a fitted gam model}
\description{ Takes a fitted \code{gam} object produced by \code{gam()} and produces some diagnostic information
about the fitting procedure and results.
about the fitting procedure and results. The default is to produce 4 residual
plots, and some information about the convergence of the smoothness selection optimization.
}
\usage{
gam.check(b)
......@@ -13,10 +14,13 @@ gam.check(b)
\item{b}{ a fitted \code{gam} object as produced by \code{gam()}.}
}
\details{ This function plots 4 standard diagnostic plots, and some other
convergence diagnostics. Output differs depending on whether the underlying
fitting method was \code{\link{mgcv}} or another method (see \code{\link{gam.method}}).
convergence diagnostics. Usually the 4 plots are various residual plots. The
printed information relates to the optimization used to select smoothing
parameters. For the default optimization methods the information is summarized in a
readable way, but for other optimization methods, whatever is returned by way of
convergence diagnostics is simply printed.
For \code{\link{mgcv}} based fits , the first plot shows the GCV or UBRE score against model
For \code{\link{mgcv}} based fits (not the default), the first plot shows the GCV or UBRE score against model
degrees of freedom, given the final estimates of the relative smoothing
parameters for the model. This is a slice through the
GCV/UBRE score function that passes through the minimum found during fitting. Although not conclusive (except in the single
......@@ -45,36 +49,18 @@ estimation of the effective degrees of freedom: this usually reflects problems w
In this circumstance reported estimated degrees of freedom can not be trusted, although the fitted model and term estimates are likely to be quite
acceptable.
If the fit method is based on \code{\link{magic}} or \code{\link{gam.fit2}} then there is no global search and the problems with phantom local minima are much reduced. The first plot in this case will simply be a normal QQ plot of the standardized residuals.
The other 3 plots are two residual plots and plot of fitted values against original data.
The function also prints out information about the convergence of the GCV minimization algorithm, indicating how
many iterations were required to minimise the GCV/UBRE score. A message is printed if the minimization terminated by
failing to improve the score with a steepest descent step: otherwise minimization terminated by meeting convergence criteria.
The mean absolute gradient or RMS gradient of the GCV/UBRE function at the minimum is given. An indication of whether or not the Hessian of the GCV/UBRE function is positive definite is given. If some smoothing parameters
are not well defined (effectively zero, or infinite) then it may not be, although this is not usually a problem. If the fit method is \code{\link{mgcv}}, a message is printed
if the second guess smoothing parameters did not improve on the first guess - this is primarily there for the developer.
For other fit methods the estimated rank of the model is printed.
The ideal results from this function have a smooth, single minima GCV/UBRE
plot (where appropriate),
good residual plots, and convergence to small
gradients with a positive definite Hessian. However, failure to meet some of these criteria
is often acceptable, and the information provided is primarily of use in diagnosing suspected problems.
High gradients at convergence are a clear indication of problems, however.
Fuller data can be extracted from \code{mgcv.conv} part of the \code{gam} object.
If the fit method is based on \code{\link{magic}} or \code{\link{gam.fit2}} or
\code{\link{gam.fit3}}then there is no global search and the problems with
phantom local minima are much reduced. These more recent methods are also much
more robust than the \code{\link{mgcv}} based methods.
}
\references{
Wood, S.N. (2000) Modelling and Smoothing Parameter Estimation
with Multiple Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428
Wood S.N. (2006) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114
\url{http://www.maths.bath.ac.uk/~sw283/}
......
......@@ -29,18 +29,18 @@ underlying fitting method is \code{\link{mgcv}} (not the default - see
}
\item{epsilon}{This is used for judging conversion of the GLM IRLS loop in
\code{\link{gam.fit}} or \code{\link{gam.fit2}} .}
\code{\link{gam.fit}}, \code{\link{gam.fit2}} or \code{\link{gam.fit3}}.}
\item{maxit}{Maximum number of IRLS iterations to perform using cautious
GCV/UBRE optimization, after \code{globit} IRLS iterations with normal GCV
optimization have been performed. Note that only fitting based on
\code{\link{mgcv}} (not default) makes
any distinction between cautious and global optimization.}
\item{maxit}{Maximum number of IRLS iterations to perform. When using
performance iteration (not default) then these iterations re in addition to
\code{globit} initial iterations. If fitting is based on
\code{\link{mgcv}} (not default, even for performance iteration) then GCV/UBRE
will be cautious for these iterations.}
\item{globit}{Maximum number of IRLS iterations to perform with normal
GCV/UBRE optimization. If convergence is not achieved after these
iterations then a further \code{maxit} iterations will be performed
using cautious GCV/UBRE optimization. }
\item{globit}{Maximum number of initial IRLS iterations to perform (with normal
GCV/UBRE optimization) when using performance iteration (only). If convergence
is not achieved after these iterations then a further \code{maxit} iterations
will be performed. Ignored for default outer iteration.}
\item{mgcv.tol}{The convergence tolerance parameter to use in GCV/UBRE optimization.}
......@@ -64,7 +64,8 @@ except those based on \code{\link{mgcv}}).}
parameterization which requires no further constraint. Usually this means that
all the smooths are automatically centered (i.e. they sum to zero over the
covariate values). If \code{FALSE} then the ordinary parameterizations of the
smooths are used, which require constraints to be imposed during fitting.}
smooths are used, which require constraints to be imposed during fitting. Not
all estimation methods can deal with the \code{FALSE} case.}
\item{max.tprs.knots}{This is the default initial maximum number of knots to allow
when constructing a t.p.r.s bases (\code{bs="tp"}). The set up cost (and
......@@ -74,10 +75,10 @@ smoothing basis (or \code{\link{te}} terms), or the approach illustrated in the
examples in \code{\link{gam}}, rather than simply increasing this default.}
\item{nlm}{list of control parameters to pass to \code{\link{nlm}} if this is
used for outer estimation of smoothing parameters. See details.}
used for outer estimation of smoothing parameters (not default). See details.}
\item{optim}{list of control parameters to pass to \code{\link{optim}} if this
is used for outer estimation of smoothing parameters. See details.}
is used for outer estimation of smoothing parameters (not default). See details.}
\item{newton}{list of control parameters to pass to default Newton optimizer
used for outer estimation of log smoothing parameters. See details.}
......@@ -118,7 +119,7 @@ Outer iteration using \code{\link{optim}} is controlled using list
default value 1e7.
When fitting is been done by calls to routine \code{\link{mgcv}},
When fitting is been done by calls to routine \code{\link{mgcv}} (not default),
\code{maxit} and \code{globit} control the maximum iterations of the IRLS algorithm, as follows:
the algorithm will first execute up to
\code{globit} steps in which the GCV/UBRE algorithm performs a global search for the best overall
......
......@@ -23,7 +23,8 @@ There are two things that you can do to speed up GAM fitting. (i) Change the
\code{method} argument to \code{\link{gam}} so that `performance iteration' is
used in place of the default outer iteration. See the \code{perf.magic} option
under \code{\link{gam.method}}, for example. Usually performance iteration
converges well and is quick. (ii) For large datasets it may be worth changing
converges well and it can be quicker than the default outer iteration.
(ii) For large datasets it may be worth changing
the smoothing basis to use \code{bs="cr"} (see \code{\link{s}} for details)
for 1-d smooths, and to use \code{\link{te}} smooths in place of
\code{\link{s}} smooths for smooths of more than one variable. This is because
......
......@@ -35,6 +35,6 @@ generalized additive models. J. Amer. Statist. Ass. 99:637-686
\author{ Simon N. Wood \email{simon.wood@r-project.org}}
\seealso{ \code{\link{gam.fit2}}, \code{\link{gam}}, \code{\link{mgcv}}, \code{\link{magic}}}
\seealso{ \code{\link{gam.fit3}}, \code{\link{gam.fit2}}, \code{\link{gam}}, \code{\link{mgcv}}, \code{\link{magic}}}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..
......@@ -94,17 +94,18 @@ fitted model, rather than an intermediate used in optimization.}
\item{...}{Other arguments: ignored.}
}
\details{ This routine is basically \code{\link{glm.fit}} with some
\details{ These routines are basically \code{\link{glm.fit}} with some
modifications to allow (i) for quadratic penalties on the log likelihood;
(ii) derivatives of the model coefficients with respect to
log smoothing parameters to be obtained (by updating alongside the P-IRLS
iteration) and (iii) derivatives of the GAM GCV and UBRE scores to be
iteration in the case of \code{gam.fit2}, and by an extra iteration in the
case of \code{gam.fit3}) and (iii) derivatives of the GAM GCV and UBRE scores to be
evaluated at convergence.
In addition the routine applies step halving to any step that increases the
In addition the routines apply step halving to any step that increases the
penalized deviance substantially.
The most costly parts of the calculation are performed by calls to compiled C
The most costly parts of the calculations are performed by calls to compiled C
code (which in turn calls LAPACK routines) in place of the compiled code that
would usually perform least squares estimation on the working model in the
IRLS iteration.
......
......@@ -110,7 +110,7 @@ occasions when the P-IRLS convergence tolerance is close to being matched
exactly, so that two components of a finite differenced derivative require
different numbers of iterations of P-IRLS in their evaluation. An alternative
is provided in which \code{nlm} uses numerically exact first derivatives, this
is faster and less problematic than the other scheme. An further alternative is to use a quasi-Newton
is faster and less problematic than the other scheme. A further alternative is to use a quasi-Newton
scheme with exact derivtives, based on \code{optim}. In practice this usually
seems to be slower than the \code{nlm} method.
......
......@@ -10,13 +10,12 @@ parameters.
This routine optimizes a GCV or UBRE score in this way. Basically the GCV or
UBRE score is evaluated for each trial set of smoothing parameters by
estimating the GAM for those smoothing parameters. The score is minimized
w.r.t. the parameters numerically, using \code{optim} or \code{nlm}. Exact
derivatives of the score can be used by fitting with \code{\link{gam.fit2}},
which improves efficiency and reliability relative to relying solely on finite
w.r.t. the parameters numerically, using \code{newton} (default), \code{optim} or \code{nlm}. Exact
derivatives of the score can be used by fitting with \code{\link{gam.fit2}} or
\code{link{gam.fit3}} (for exact first and second derivatives). This
improves efficiency and reliability relative to relying on finite
difference derivatives.
Note that there is a choise between basing GCV/UBRE scores on the deviance or
the Pearson statistic: see \code{\link{gam.method}}.
Not normally called directly, but rather a service routine for \code{\link{gam}}.
}
......@@ -34,7 +33,7 @@ gam.outer(lsp,fscale,family,control,method,gamma,G,...)
\item{control}{control argument to pass to \code{\link{gam.fit}} if pure
finite differencing is being used.}
\item{method}{method list reurned from \code{\link{gam.method}}. This defines
\item{method}{method list returned from \code{\link{gam.method}}. This defines
the optimization method to use.}
\item{gamma}{ The degree of freedom inflation factor for the GCV/UBRE score.}
......
......@@ -10,16 +10,9 @@ R routines and compiled C code. For further information on usuage see code for
\references{
Wood S.N. (2006) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398
Wood, S.N. (2000) Modelling and Smoothing Parameter Estimation
with Multiple Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428
Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114
\url{http://www.maths.bath.ac.uk/~sw283/}
}
\author{ Simon N. Wood \email{simon.wood@r-project.org}}
......
......@@ -172,32 +172,18 @@ useful for CI's as smooths are usually biased.}
\references{
Key References on this implementation:
A Key Reference on this implementation:
Wood, S.N. (2000) Modelling and Smoothing Parameter Estimation
with Multiple Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428
Wood, S.N. (2006) Generalized Additive Models: An Introduction with R. Chapman
& Hall/ CRC, Boca Raton, Florida
Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114
Wood, S.N. (in press) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. J. Amer. Statist. Ass.
Wood, S.N. (2004) On confidence intervals for GAMs based on penalized
regression splines. Technical Report 04-12 Department of Statistics,
University of Glasgow.
Wood, S.N. (2006) Low rank scale invariant tensor product smooths for
generalized additive mixed models. Biometrics.
Key Reference on GAMs and related models:
Key Reference on GAMs generally:
Hastie (1993) in Chambers and Hastie (1993) Statistical Models in S. Chapman
and Hall.
Hastie and Tibshirani (1990) Generalized Additive Models. Chapman and Hall.
Wahba (1990) Spline Models of Observational Data. SIAM
}
\author{ Simon N. Wood \email{simon.wood@r-project.org}}
......
......@@ -129,7 +129,8 @@ One advantage of this approach is that it allows correlated errors to be dealt w
or the correlation structures available in the \code{nlme} library.
Some brief details of how GAMs are represented as mixed models and estimated using \code{lme} or \code{glmmPQL} in \code{gamm} can be found in Wood (2004a,b). In addition \code{gamm} obtains a posterior covariance matrix for the parameters of all the fixed effects and the smooth terms. The approach is similar to that described in (Lin & Zhang, 1999) - the covariance matrix of the data (or pseudodata in the generalized case) implied by the weights, correlation and random effects structure is obtained, based on the estimates of the parameters of these terms and this is used to obtain the posterior covariance matrix of the fixed and smooth effects.
Some details of how GAMs are represented as mixed models and estimated using
\code{lme} or \code{glmmPQL} in \code{gamm} can be found in Wood (2004 ,2006a,b). In addition \code{gamm} obtains a posterior covariance matrix for the parameters of all the fixed effects and the smooth terms. The approach is similar to that described in (Lin & Zhang, 1999) - the covariance matrix of the data (or pseudodata in the generalized case) implied by the weights, correlation and random effects structure is obtained, based on the estimates of the parameters of these terms and this is used to obtain the posterior covariance matrix of the fixed and smooth effects.
The bases used to represent smooth terms are the same as those used in \code{\link{gam}}.
......@@ -178,14 +179,16 @@ with S. Fourth edition. Springer.
Wahba, G. (1983) Bayesian confidence intervals for the cross validated smoothing spline.
JRSSB 45:133-150
Wood, S.N. (2004a) Stable and efficient multiple smoothing parameter estimation for
Wood, S.N. (2004) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. Journal of the American Statistical Association. 99:673-686
Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114
Wood, S.N. (2006a) Low rank scale invariant tensor product smooths for
generalized additive mixed models. Biometrics 62(4):1025-1036
Wood, S.N. (2006) Low rank scale invariant tensor product smooths for
Generalized Additive Mixed Models. Biometrics.
Wood S.N. (2006b) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
Wang, Y. (1998) Mixed effects smoothing spline analysis of variance. J.R. Statist. Soc. B 60, 159-174
......
......@@ -12,8 +12,12 @@ by \code{gamm} to set up a generalized additive mixed model in a form suitable f
Wood, S.N. (2004) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. Journal of the American Statistical Association. 99:673-686
Wood, S.N. (2006) Low rank scale invariant tensor product smooths for
Generalized Additive Mixed Models. Biometrics
Wood, S.N. (2006a) Low rank scale invariant tensor product smooths for
generalized additive mixed models. Biometrics 62(4):1025-1036
Wood S.N. (2006b) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
\url{http://www.maths.bath.ac.uk/~sw283/}
......
......@@ -5,8 +5,9 @@
GAMs with GCV smoothness estimation and GAMMs by REML/PQL
}
\description{
\code{mgcv} provides functions for generalized additive modelling and
generalized additive mixed modelling. Particular features of the package are
\code{mgcv} provides functions for generalized additive modelling and
generalized additive mixed modelling (including
variable coefficient models). Particular features of the package are
facilities for automatic smoothness selection, and the provision of a variety
of smooths of more than one variable. User defined smooths are also
supported. A Bayesian approach to confidence/credible interval calculation is
......
\name{model.matrix.gam}
\alias{model.matrix.gam}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Extract model matrix from GAM fit}
\description{Obtains the model matrix from a fitted \code{gam} object.
}
\usage{
\method{model.matrix}{gam}(object, ...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{object}{ fitted model object of class \code{gam} as produced by \code{gam()}.}
\item{...}{ other arguments, passed to \code{\link{predict.gam}}.}
}
\details{Calls \code{\link{predict.gam}} with no \code{newdata} argument and
\code{type="lpmatrix"} in order to obtain the model matrix of \code{object}.
}
\value{ A model matrix.
}
\author{ Simon N. Wood \email{simon.wood@r-project.org}
}
\references{
Wood S.N. (2006b) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
}
\seealso{ \code{\link{gam}}}
\examples{
n <- 15
x <- runif(n)
y <- sin(x*2*pi) + rnorm(n)*.2
mod <- gam(y~s(x,bs="cc",k=6),knots=list(x=seq(0,1,length=6)))
model.matrix(mod)
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ...
......@@ -133,18 +133,9 @@ Fine control of plots for parametric terms can be obtained by calling
Chambers and Hastie (1993) Statistical Models in S. Chapman & Hall.
Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398
Wood S.N. (2006) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
Wood, S.N. (2000) Modelling and Smoothing Parameter Estimation
with Multiple Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428
Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114
Wood, S.N. (2004) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. J. Amer. Statist. Ass. 99:637-686
\url{http://www.maths.bath.ac.uk/~sw283/}
}
\author{ Simon N. Wood \email{simon.wood@r-project.org}
......
......@@ -104,19 +104,8 @@ regions for quantities derived from the model.
Chambers and Hastie (1993) Statistical Models in S. Chapman & Hall.
Gu and Wahba (1991) Minimizing GCV/GML scores with multiple smoothing parameters via
the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398
Wood, S.N. (2000) Modelling and Smoothing Parameter Estimation
with Multiple Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428
Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114
Wood, S.N. (2004) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. J. Amer. Statist. Ass. 99:637-686
\url{http://www.maths.bath.ac.uk/~sw283/}
Wood S.N. (2006b) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
}
\author{ Simon N. Wood \email{simon.wood@r-project.org}
......
......@@ -81,11 +81,13 @@ options spelled out explicitly.}
\references{
Wood, S.N. (2000) Modelling and Smoothing Parameter Estimation
with Multiple Quadratic Penalties. J.R.Statist.Soc.B 62(2):413-428
Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114
Wood S.N. (2006) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
\url{http://www.maths.bath.ac.uk/~sw283/}
}
......