predict.gam.Rd 12.4 KB
Newer Older
1 2 3 4
\name{predict.gam}
\alias{predict.gam}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Prediction from fitted GAM model}
5

6 7
\description{ Takes a fitted \code{gam} object produced by \code{gam()} 
and produces predictions given a new set of values for the model covariates 
8 9 10 11 12
or the original values used for the model fit. Predictions can be accompanied
by standard errors, based on the posterior distribution of the model
coefficients. The routine can optionally return the matrix by which the model
coefficients must be pre-multiplied in order to yield the values of the linear predictor at
the supplied covariate values: this is useful for obtaining credible regions
13
for quantities derived from the model (e.g. derivatives of smooths), and for lookup table prediction outside
14
\code{R} (see example code below).}
15

16
\usage{
17
\method{predict}{gam}(object,newdata,type="link",se.fit=FALSE,terms=NULL,
18 19
        exclude=NULL,block.size=NULL,newdata.guaranteed=FALSE,
        na.action=na.pass,unconditional=FALSE,...)
20 21 22 23 24 25
}
%- maybe also `usage' for other objects documented here.
\arguments{ 
  
 \item{object}{ a fitted \code{gam} object as produced by \code{gam()}.
                }
26
 \item{newdata}{ A data frame or list containing the values of the model covariates at which predictions
27 28 29 30 31 32 33 34 35
  are required. If this is not provided then predictions corresponding to the
                original data are returned. If \code{newdata} is provided then
                it should contain all the variables needed for prediction: a
                warning is generated if not. } 
 
\item{type}{ When this has the value \code{"link"} (default) the linear predictor (possibly with
associated standard errors) is returned. When \code{type="terms"} each component of the 
linear predictor is returned seperately (possibly with standard errors): this includes 
parametric model components, followed by each smooth component, but excludes
36 37 38
any offset and any intercept. \code{type="iterms"} is the same, except that any standard errors 
returned for smooth components will include the uncertainty about the intercept/overall mean.  When 
\code{type="response"} predictions 
39 40
on the scale of the response are returned (possibly with approximate
standard errors). When \code{type="lpmatrix"} then a matrix is returned
41 42
which yields the values of the linear predictor (minus any offset) when
postmultiplied by the
43
parameter vector (in this case \code{se.fit} is ignored). The latter
44
option is most useful for getting variance estimates for quantities derived from
45 46 47
the model: for example integrated quantities, or derivatives of smooths. A
linear predictor matrix can also be used to implement approximate prediction
outside \code{R} (see example code, below). }
48 49 50

\item{se.fit}{ when this is TRUE (not default) standard error estimates are returned for each prediction.}

51 52 53
\item{terms}{if \code{type=="terms"} or \code{type="iterms"} then only results for the terms (smooth or parametric) named in this array
will be returned. Otherwise any smooth terms not named in this array will be set to zero. If \code{NULL} then all terms are included.}

54 55
\item{exclude}{if \code{type=="terms"} or \code{type="iterms"} then terms (smooth or parametric) named in this array will not be returned. Otherwise any smooth terms named in this array will be set to zero. 
If \code{NULL} then no terms are excluded. Note that this is the term names as it appears in the model summary, see example.}
56 57 58

\item{block.size}{maximum number of predictions to process per call to underlying
code: larger is quicker, but more memory intensive. Set to < 1 to use total number
59 60
of predictions as this. If \code{NULL} then block size is 1000 if new data supplied, 
and the number of rows in the model frame otherwise. }
61 62 63

\item{newdata.guaranteed}{Set to \code{TRUE} to turn off all checking of
\code{newdata} except for sanity of factor levels: this can speed things up
64 65
for large prediction tasks, but \code{newdata} must be complete, with no
\code{NA} values for predictors required in the model. }
66

67 68 69 70 71 72 73 74
\item{na.action}{what to do about \code{NA} values in \code{newdata}. With the
default \code{na.pass}, any row of \code{newdata} containing \code{NA} values
for required predictors, gives rise to \code{NA} predictions (even if the term concerned has no
\code{NA} predictors). \code{na.exclude} or \code{na.omit} result in the
dropping of \code{newdata} rows, if they contain any \code{NA} values for
required predictors. If \code{newdata} is missing then \code{NA} handling is 
determined from \code{object$na.action}.}

75 76 77 78
\item{unconditional}{if \code{TRUE} then the smoothing parameter uncertainty corrected covariance
matrix is used, when available, otherwise the covariance matrix conditional on the estimated 
smoothing parameters is used. }

79
\item{...}{ other arguments.}
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108

}

\value{ If \code{type=="lpmatrix"} then a matrix is returned which will
  give a vector of linear predictor values (minus any offest) at the supplied covariate
  values, when applied to the model coefficient vector. 
Otherwise, if \code{se.fit} is \code{TRUE} then a 2 item list is returned with items (both arrays) \code{fit}
and \code{se.fit} containing predictions and associated standard error estimates, otherwise an 
array of predictions is returned. The dimensions of the returned arrays depends on whether 
\code{type} is \code{"terms"} or not: if it is then the array is 2 dimensional with each 
term in the linear predictor separate, otherwise the array is 1 dimensional and contains the 
linear predictor/predicted values (or corresponding s.e.s). The linear predictor returned termwise will 
not include the offset or the intercept.

\code{newdata} can be a data frame, list or model.frame: if it's a model frame
then all variables must be supplied.

}

\details{The standard errors produced by \code{predict.gam} are based on the
Bayesian posterior covariance matrix of the parameters \code{Vp} in the fitted
gam object. 

To facilitate plotting with \code{\link{termplot}}, if \code{object} possesses
an attribute \code{"para.only"} and \code{type=="terms"} then only parametric
terms of order 1 are returned (i.e. those that \code{termplot} can handle).

Note that, in common with other prediction functions, any offset supplied to
\code{\link{gam}} as an argument is always ignored when predicting, unlike
109 110 111 112
offsets specified in the gam model formula.

See the examples for how to use the \code{lpmatrix} for obtaining credible
regions for quantities derived from the model. 
113 114 115 116 117 118
}

\references{

Chambers and Hastie (1993) Statistical Models in S. Chapman & Hall.

119 120 121
Marra, G and S.N. Wood (2012) Coverage Properties of Confidence Intervals for Generalized Additive
Model Components. Scandinavian Journal of Statistics, 39(1), 53-74.

122 123
Wood S.N. (2006b) Generalized Additive Models: An Introduction with R. Chapman
and Hall/CRC Press.
124
}
125
\author{ Simon N. Wood \email{simon.wood@r-project.org} 
126 127 128 129 130 131

The design is inspired by the S function of the same name described in
Chambers and Hastie (1993) (but is not a clone).

}

132 133 134 135 136
\section{WARNING }{
Predictions are likely to be incorrect if data dependent transformations of the covariates
are used within calls to smooths. See examples.

Note that the behaviour of this function is not identical to 
137 138
\code{predict.gam()} in Splus.

139 140
\code{type=="terms"} does not exactly match what \code{predict.lm} does for
parametric model components.
141 142 143 144 145 146 147
} 

\seealso{  \code{\link{gam}}, \code{\link{gamm}}, \code{\link{plot.gam}}}

\examples{
library(mgcv)
n<-200
148
sig <- 2
149 150 151 152
dat <- gamSim(1,n=n,scale=sig)

b<-gam(y~s(x0)+s(I(x1^2))+s(x2)+offset(x3),data=dat)

153 154
newd <- data.frame(x0=(0:30)/30,x1=(0:30)/30,x2=(0:30)/30,x3=(0:30)/30)
pred <- predict.gam(b,newd)
155
pred0 <- predict(b,newd,exclude="s(x0)") ## prediction excluding a term
156

157
#############################################
158
## difference between "terms" and "iterms"
159
#############################################
160 161 162 163
nd2 <- data.frame(x0=c(.25,.5),x1=c(.25,.5),x2=c(.25,.5),x3=c(.25,.5))
predict(b,nd2,type="terms",se=TRUE)
predict(b,nd2,type="iterms",se=TRUE)

164
#########################################################
165
## now get variance of sum of predictions using lpmatrix
166
#########################################################
167

168
Xp <- predict(b,newd,type="lpmatrix") 
169

170
## Xp \%*\% coef(b) yields vector of predictions
171

172
a <- rep(1,31)
173 174
Xs <- t(a) \%*\% Xp ## Xs \%*\% coef(b) gives sum of predictions
var.sum <- Xs \%*\% b$Vp \%*\% t(Xs)
175

176 177

#############################################################
178 179
## Now get the variance of non-linear function of predictions
## by simulation from posterior distribution of the params
180
#############################################################
181

182 183 184 185 186 187
rmvn <- function(n,mu,sig) { ## MVN random deviates
  L <- mroot(sig);m <- ncol(L);
  t(mu + L\%*\%matrix(rnorm(m*n),m,n)) 
}

br <- rmvn(1000,coef(b),b$Vp) ## 1000 replicate param. vectors
188 189
res <- rep(0,1000)
for (i in 1:1000)
190
{ pr <- Xp \%*\% br[i,] ## replicate predictions
191 192 193
  res[i] <- sum(log(abs(pr))) ## example non-linear function
}
mean(res);var(res)
194

195
## loop is replace-able by following .... 
196

197
res <- colSums(log(abs(Xp \%*\% t(br))))
198

199 200 201 202
##################################################################
# illustration of unsafe scale dependent transforms in smooths....
##################################################################

203
b0 <- gam(y~s(x0)+s(x1)+s(x2)+x3,data=dat) ## safe
204 205 206 207
b1 <- gam(y~s(x0)+s(I(x1/2))+s(x2)+scale(x3),data=dat) ## safe
b2 <- gam(y~s(x0)+s(scale(x1))+s(x2)+scale(x3),data=dat) ## unsafe
pd <- dat; pd$x1 <- pd$x1/2; pd$x3 <- pd$x3/2
par(mfrow=c(1,2))
208 209 210 211
plot(predict(b0,pd),predict(b1,pd),main="b0 and b1 predictions match")
abline(0,1,col=2)
plot(predict(b0,pd),predict(b2,pd),main="b2 unsafe, doesn't match")
abline(0,1,col=2)
212 213


214
##################################################################
215 216 217 218 219 220 221 222 223 224
## The following shows how to use use an "lpmatrix" as a lookup 
## table for approximate prediction. The idea is to create 
## approximate prediction matrix rows by appropriate linear 
## interpolation of an existing prediction matrix. The additivity 
## of a GAM makes this possible. 
## There is no reason to ever do this in R, but the following 
## code provides a useful template for predicting from a fitted 
## gam *outside* R: all that is needed is the coefficient vector 
## and the prediction matrix. Use larger `Xp'/ smaller `dx' and/or 
## higher order interpolation for higher accuracy.  
225
###################################################################
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243

xn <- c(.341,.122,.476,.981) ## want prediction at these values
x0 <- 1         ## intercept column
dx <- 1/30      ## covariate spacing in `newd'
for (j in 0:2) { ## loop through smooth terms
  cols <- 1+j*9 +1:9      ## relevant cols of Xp
  i <- floor(xn[j+1]*30)  ## find relevant rows of Xp
  w1 <- (xn[j+1]-i*dx)/dx ## interpolation weights
  ## find approx. predict matrix row portion, by interpolation
  x0 <- c(x0,Xp[i+2,cols]*w1 + Xp[i+1,cols]*(1-w1))
}
dim(x0)<-c(1,28) 
fv <- x0\%*\%coef(b) + xn[4];fv    ## evaluate and add offset
se <- sqrt(x0\%*\%b$Vp\%*\%t(x0));se ## get standard error
## compare to normal prediction
predict(b,newdata=data.frame(x0=xn[1],x1=xn[2],
        x2=xn[3],x3=xn[4]),se=TRUE)

244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
####################################################################
## Differentiating the smooths in a model (with CIs for derivatives)
####################################################################

## simulate data and fit model...
dat <- gamSim(1,n=300,scale=sig)
b<-gam(y~s(x0)+s(x1)+s(x2)+s(x3),data=dat)
plot(b,pages=1)

## now evaluate derivatives of smooths with associated standard 
## errors, by finite differencing...
x.mesh <- seq(0,1,length=200) ## where to evaluate derivatives
newd <- data.frame(x0 = x.mesh,x1 = x.mesh, x2=x.mesh,x3=x.mesh)
X0 <- predict(b,newd,type="lpmatrix") 

eps <- 1e-7 ## finite difference interval
x.mesh <- x.mesh + eps ## shift the evaluation mesh
newd <- data.frame(x0 = x.mesh,x1 = x.mesh, x2=x.mesh,x3=x.mesh)
X1 <- predict(b,newd,type="lpmatrix")

Xp <- (X1-X0)/eps ## maps coefficients to (fd approx.) derivatives
colnames(Xp)      ## can check which cols relate to which smooth

par(mfrow=c(2,2))
for (i in 1:4) {  ## plot derivatives and corresponding CIs
  Xi <- Xp*0 
  Xi[,(i-1)*9+1:9+1] <- Xp[,(i-1)*9+1:9+1] ## Xi\%*\%coef(b) = smooth deriv i
  df <- Xi\%*\%coef(b)              ## ith smooth derivative 
  df.sd <- rowSums(Xi\%*\%b$Vp*Xi)^.5 ## cheap diag(Xi\%*\%b$Vp\%*\%t(Xi))^.5
  plot(x.mesh,df,type="l",ylim=range(c(df+2*df.sd,df-2*df.sd)))
  lines(x.mesh,df+2*df.sd,lty=2);lines(x.mesh,df-2*df.sd,lty=2)
}


278

279 280 281
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..