Commit 1f187945 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.7-3

parent c7eaaa75
Package: mgcv
Version: 1.7-2
Version: 1.7-3
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: GAMs with GCV/AIC/REML smoothness estimation and GAMMs by PQL
......@@ -12,6 +12,6 @@ Imports: graphics, stats, nlme, Matrix
Suggests: nlme (>= 3.1-64), splines, Matrix
LazyLoad: yes
License: GPL (>= 2)
Packaged: 2010-11-12 10:50:02 UTC; simon
Packaged: 2011-02-14 18:14:10 UTC; simon
Repository: CRAN
Date/Publication: 2010-11-13 15:19:37
Date/Publication: 2011-02-14 19:21:19
......@@ -20,27 +20,32 @@ export(anova.gam, bam, bam.update, concurvity, cSplineDes,
print.gam,print.summary.gam,predict.gam,
PredictMat,Predict.matrix,Predict.matrix2,
Predict.matrix.cr.smooth,
Predict.matrix.duchon.spline,
Predict.matrix.cs.smooth,
Predict.matrix.cyclic.smooth,
Predict.matrix.tensor.smooth,
Predict.matrix.tprs.smooth,
Predict.matrix.ts.smooth,
Predict.matrix.sos.smooth,
Predict.matrix.mrf.smooth,
Predict.matrix.pspline.smooth,
Predict.matrix.random.effect,
qq.gam,
residuals.gam,rig,rTweedie, s,
slanczos,
smoothCon,smooth.construct,smooth.construct2,
smooth.construct.cc.smooth.spec,
smooth.construct.cp.smooth.spec,
smooth.construct.cr.smooth.spec,
smooth.construct.cs.smooth.spec,
smooth.construct.ds.smooth.spec,
smooth.construct.tensor.smooth.spec,
smooth.construct.tp.smooth.spec,
smooth.construct.ts.smooth.spec,
smooth.construct.ps.smooth.spec,
smooth.construct.re.smooth.spec,
smooth.construct.mrf.smooth.spec,
smooth.construct.sos.smooth.spec,
smooth.construct.ad.smooth.spec,
summary.gam,sp.vcov,
t2,te,tensor.prod.model.matrix,tensor.prod.penalties,
......
......@@ -51,7 +51,11 @@ mini.mf <-function(mf,chunk.size) {
## basis setup.
n <- nrow(mf)
if (n<=chunk.size) return(mf)
seed <- get(".Random.seed", envir = .GlobalEnv)
seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed
if (inherits(seed,"try-error")) {
runif(1)
seed <- get(".Random.seed",envir=.GlobalEnv)
}
kind <- RNGkind(NULL)
RNGkind("default", "default")
set.seed(66)
......@@ -163,7 +167,7 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
w <- sqrt(w)
if (b == 1) qrx <- qr.update(w*X[good,],w*z)
else qrx <- qr.update(w*X[good,],w*z,qrx$R,qrx$f,qrx$y.norm2)
gc()
rm(X);gc() ## X can be large: remove and reclaim
}
G$n <- nobs
......@@ -202,6 +206,7 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
object <- gam(G=G,method=method,gamma=gamma,scale=scale)
y -> G$y; w -> G$w; n -> G$n;offset -> G$offset
}
gc()
if (method=="GCV.Cp") {
object <- list()
......@@ -251,6 +256,7 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
# else linkinv(offset)
# nulldev <- sum(dev.resids(y, wtdmu, weights))
object$wt <- wt
rm(G);gc()
object
} ## end bgam.fit
......@@ -320,7 +326,7 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0)
}
qrx <- qr.update(X,y,qrx$R,qrx$f,qrx$y.norm2)
gc()
rm(X);gc() ## X can be large: remove and reclaim
}
G$n <- n
G$y <- mf[[gp$response]]
......@@ -338,6 +344,7 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0)
y <- rwMatrix(stop,row,weight,sqrt(G$w)*G$y)
qrx <- qr.update(X,y)
}
rm(X);gc() ## X can be large: remove and reclaim
}
rss.extra <- qrx$y.norm2 - sum(qrx$f^2)
......@@ -364,7 +371,7 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0)
object$gcv.ubre <- object$gcv.ubre - (n-1)*log(ld)
}
}
gc()
if (method=="GCV.Cp") {
object <- list()
object$coefficients <- fit$b
......@@ -398,7 +405,7 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0)
bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action=na.omit,
offset=NULL,method="REML",control=gam.control(),scale=0,gamma=1,knots=NULL,
offset=NULL,method="REML",control=list(...),scale=0,gamma=1,knots=NULL,
sp=NULL,min.sp=NULL,paraPen=NULL,chunk.size=10000,rho=0,...)
## Routine to fit an additive model to a large dataset. The model is stated in the formula,
......@@ -406,7 +413,7 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
## parametric terms.
## This is a modification of `gam' designed to build the QR decompostion of the model matrix
## up in chunks, to keep memory costs down.
{ require(mgcv)
{ control <- do.call("gam.control",control)
if (is.character(family))
family <- eval(parse(text = family))
if (is.function(family))
......@@ -487,17 +494,23 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
## now build up proper model matrix, and deal with y, w, and offset...
if (control$trace) cat("Setup complete. Calling fit\n")
colnamesX <- colnames(G$X)
if (am) {
if (nrow(mf)>chunk.size) G$X <- matrix(0,0,ncol(G$X)); gc()
object <- bam.fit(G,mf,chunk.size,gp,scale,gamma,method,rho=rho)
} else {
G$X <- matrix(0,0,ncol(G$X)); gc()
if (rho!=0) warning("AR1 parameter rho unused with generalized model")
object <- bgam.fit(G, mf, chunk.size, gp ,scale ,gamma,method=method,
control = control,...)
}
gc()
if (control$trace) cat("Fit complete. Finishing gam object.\n")
if (scale < 0) { object$scale.estimated <- TRUE;object$scale <- object$scale.est} else {
......@@ -530,7 +543,7 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
object$model <- mf;rm(mf);gc()
object$na.action <- attr(object$model,"na.action") # how to deal with NA's
object$nsdf <- G$nsdf
names(object$coefficients)[1:G$nsdf] <- colnames(G$X)[1:G$nsdf]
names(object$coefficients)[1:G$nsdf] <- colnamesX[1:G$nsdf]
object$offset <- G$offset
object$prior.weights <- G$w
object$pterms <- G$pterms
......@@ -545,7 +558,8 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
object$y <- object$model[[gp$response]]
object$NA.action <- na.action ## version to use in bam.update
gc()
rm(G);gc()
## note that predict.gam assumes that it must be ok not to split the
## model frame, if no new data supplied, so need to supply explicitly
object$linear.predictors <- as.numeric(predict.gam(object,newdata=object$model,block.size=chunk.size))
......
......@@ -10,9 +10,10 @@ gam.reparam <- function(rS,lsp,deriv)
## Finds an orthogonal reparameterization which avoids `dominant machine zero leakage' between
## components of the square root penalty.
## rS is the list of the square root penalties: last entry is root of fixed.
## penalty, if fixed.penalty=TRUE.
## penalty, if fixed.penalty=TRUE (i.e. length(rS)>length(sp))
## lsp is the vector of log smoothing parameters.
## *Assumption* here is that rS[[i]] are in a null space of total penalty already
## *Assumption* here is that rS[[i]] are in a null space of total penalty already;
## see e.g. totalPenaltySpace & mini.roots
## Ouputs:
## S -- the total penalty matrix similarity transformed for stability
## rS -- the component square roots, transformed in the same way
......@@ -1971,7 +1972,9 @@ fix.family.ls<-function(fam)
return(fam)
}
if (family=="quasi"||family=="quasipoisson"||family=="quasibinomial") {
fam$ls <- function(y,w,n,scale) rep(0,3)
## fam$ls <- function(y,w,n,scale) rep(0,3)
## Uses extended quasi-likelihood form...
fam$ls <- function(y,w,n,scale) c(-sum(w)*log(scale)/2,-sum(w)/(2*scale),sum(w)/(2*scale*scale))
return(fam)
}
if (family=="inverse.gaussian") {
......
......@@ -887,7 +887,7 @@ gammPQL <- function (fixed, random, family, data, correlation, weights,
gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=list(),weights=NULL,
subset=NULL,na.action,knots=NULL,control=nlme::lmeControl(niterEM=0,optimMethod="L-BFGS-B"),
subset=NULL,na.action,knots=NULL,control=list(niterEM=0,optimMethod="L-BFGS-B"), ## nlme::lmeControl(niterEM=0,optimMethod="L-BFGS-B"),
niterPQL=20,verbosePQL=TRUE,method="ML",...)
## NOTE: niterEM modified after changed notLog parameterization - old version
## needed niterEM=3. 10/8/05.
......@@ -896,8 +896,9 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
# random terms. correlation describes the correlation structure. This routine is basically an interface
# between the bases constructors provided in mgcv and the glmmPQL routine used to estimate the model.
# NOTE: need to fill out the gam object properly
{ if (!require("nlme")) stop("gamm() requires package nlme to be installed")
{
if (!require("nlme")) stop("gamm() requires package nlme to be installed")
control <- do.call("lmeControl",control)
# if (!require("MASS")) stop("gamm() requires package MASS to be installed")
# check that random is a named list
if (!is.null(random))
......@@ -1181,6 +1182,7 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
object$weights<-object$prior.weights
if (!is.null(G$Xcentre)) object$Xcentre <- G$Xcentre ## column centering values
ret$gam<-object
ret
......
## R routines for the package mgcv (c) Simon Wood 2000-2010
## With contributions from Henric Nilsson
......@@ -17,6 +16,28 @@ Rrank <- function(R,tol=.Machine$double.eps^.9) {
rank
}
slanczos <- function(A,k=10,kl=-1) {
## Computes truncated eigen decomposition of symmetric A by
## Lanczos iteration. If kl < 0 then k largest magnitude
## eigenvalues returned, otherwise k highest and kl lowest.
## Eigenvectors are always returned too.
## set.seed(1);n <- 1000;A <- matrix(runif(n*n),n,n);A <- A+t(A);er <- slanczos(A,10)
## um <- eigen(A,symmetric=TRUE);ind <- c(1:5,(n-5+1):n)
## range(er$values-um$values[ind]);range(abs(er$vectors)-abs(um$vectors[,ind]))
## It seems that when k (or k+kl) is beyond 10-15% of n
## then you might as well use eigen(A,symmetric=TRUE), but the
## extra cost is the expensive accumulation of eigenvectors.
## Should re-write whole thing using LAPACK routines for eigenvectors.
k <- round(k);kl <- round(kl)
m <- k + max(0,kl)
if (m<1) return(list(values=rep(0,0),vectors=matrix(0,n,0),iter=0))
n <- nrow(A)
if (n != ncol(A)) stop("A not square")
oo <- .C(C_Rlanczos,A=as.double(A),U=as.double(rep(0,n*m)),D=as.double(rep(0,m)),
n=as.integer(n),m=as.integer(k),ml=as.integer(kl))
list(values = oo$D,vectors = matrix(oo$U,n,m),iter=oo$n)
}
rig <- function(n,mean,scale) {
## inverse guassian deviates generated by algorithm 5.7 of
## Gentle, 2003. scale = 1/lambda.
......@@ -751,12 +772,30 @@ gam.setup <- function(formula,pterms,data=stop("No data supplied to gam.setup"),
G$smooth[[i]] <- sm[[i]]
}
## Now test if intercept in span of parametric, and centre all smooth
## columns if it is....
## I don't think that this is legitimate --- some smooths have no null
## space, in which case this trick alters the fit...
# G$Xcentre <- NULL
# if (G$nsdf>0) {
# qrx <- qr(X[,1:G$nsdf,drop=FALSE])
# one <- rep(1,nrow(X))
# if (max(abs(one-qr.qy(qrx,qr.qty(qrx,one)))) < .Machine$double.eps^.75 ) {
# G$Xcentre <- colMeans(X)
# G$Xcentre[1:G$nsdf] <- 0
# if (max(G$Xcentre^2)<.Machine$double.eps^.75) G$Xcentre <- NULL else
# X <- sweep(X,2,G$Xcentre)
# } else G$Xcentre <- NULL
# }
if (is.null(Xp)) {
G$cmX <- colMeans(X) ## useful for componentwise CI construction
} else {
G$cmX <- colMeans(Xp)
## transform from fit params to prediction params...
G$P <- qr.coef(qr(Xp),X) ## old code assumes always full rank!!
## G$P <- qr.coef(qr(Xp),X) ## old code assumes always full rank!!
qrx <- qr(Xp,LAPACK=TRUE)
R <- qr.R(qrx)
......@@ -776,7 +815,8 @@ gam.setup <- function(formula,pterms,data=stop("No data supplied to gam.setup"),
}
G$P[qrx$pivot,] <- G$P
}
G$cmX[-(1:G$nsdf)] <- 0 ## zero the smooth parts here
if (G$nsdf>0) G$cmX[-(1:G$nsdf)] <- 0 ## zero the smooth parts here
else G$cmX <- G$cmX * 0
G$X <- X;rm(X)
n.p <- ncol(G$X)
# deal with penalties
......@@ -1253,7 +1293,7 @@ estimate.gam <- function (G,method,optimizer,control,in.out,scale,gamma,...) {
reml <- TRUE
} else reml <- FALSE ## experimental insert
Ssp <- totalPenaltySpace(G$S,G$H,G$off,ncol(G$X))
G$Eb <- Ssp$E ## balanced penalty square root for rank determination purrposes
G$Eb <- Ssp$E ## balanced penalty square root for rank determination purposes
G$U1 <- cbind(Ssp$Y,Ssp$Z) ## eigen space basis
G$Mp <- ncol(Ssp$Z) ## null space dimension
G$UrS <- list() ## need penalty matrices in overall penalty range space...
......@@ -1275,12 +1315,13 @@ estimate.gam <- function (G,method,optimizer,control,in.out,scale,gamma,...) {
else G$sig2 <- -1 #gcv
} else {G$sig2 <- scale}
if (fam.name=="quasi"||fam.name=="quasipoisson"||fam.name=="quasibinomial") {
## REML/ML invalid with quasi families
if (method=="REML") method <- "P-REML"
if (method=="ML") method <- "P-ML"
if (method=="P-ML"||method=="P-REML") warning("RE/ML is not recommended for quasi families")
}
## Following removed since extended quasi-likelihood coded up...
# if (fam.name=="quasi"||fam.name=="quasipoisson"||fam.name=="quasibinomial") {
# ## REML/ML invalid with quasi families
# if (method=="REML") method <- "P-REML"
# if (method=="ML") method <- "P-ML"
# if (method=="P-ML"||method=="P-REML") warning("RE/ML is not recommended for quasi families")
# }
if (reml) { ## then RE(ML) selection, but which variant?
criterion <- method
......@@ -1488,14 +1529,15 @@ variable.summary <- function(pf,dl,n) {
}
gam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action,offset=NULL,
method="GCV.Cp",optimizer=c("outer","newton"),control=gam.control(),
method="GCV.Cp",optimizer=c("outer","newton"),control=list(...),#gam.control(),
scale=0,select=FALSE,knots=NULL,sp=NULL,min.sp=NULL,H=NULL,gamma=1,fit=TRUE,
paraPen=NULL,G=NULL,in.out=NULL,...)
# Routine to fit a GAM to some data. The model is stated in the formula, which is then
# interpreted to figure out which bits relate to smooth terms and which to parametric terms.
{ if (is.null(G))
{ control <- do.call("gam.control",control)
if (is.null(G))
{ # create model frame.....
gp<-interpret.gam(formula) # interpret the formula
cl<-match.call() # call needed in gam object for update to work
......@@ -1587,6 +1629,7 @@ gam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
object$contrasts <- G$contrasts
object$xlevels <- G$xlevels
object$offset <- G$offset
if (!is.null(G$Xcentre)) object$Xcentre <- G$Xcentre
if (control$keepData) object$data <- data
object$df.residual <- nrow(G$X) - sum(object$edf)
object$min.edf<-G$min.edf
......@@ -2162,26 +2205,26 @@ predict.gam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,
# setup prediction arrays
n.smooth<-length(object$smooth)
if (type=="lpmatrix")
{ H<-matrix(0,np,nb)
{ H <- matrix(0,np,nb)
} else
if (type=="terms"||type=="iterms")
{ term.labels<-attr(object$pterms,"term.labels")
if (is.null(attr(object,"para.only"))) para.only <-FALSE else
para.only <- TRUE # if true then only return information on parametric part
n.pterms <- length(term.labels)
fit<-array(0,c(np,n.pterms+as.numeric(!para.only)*n.smooth))
if (se.fit) se<-fit
ColNames<-term.labels
fit <- array(0,c(np,n.pterms+as.numeric(!para.only)*n.smooth))
if (se.fit) se <- fit
ColNames <- term.labels
} else
{ fit<-array(0,np)
if (se.fit) se<-fit
{ fit <- array(0,np)
if (se.fit) se <- fit
}
stop<-0
Terms <- delete.response(object$pterms)
s.offset <- NULL # to accumulate any smooth term specific offset
any.soff <- FALSE # indicator of term specific offset existence
for (b in 1:n.blocks) # work through prediction blocks
if (n.blocks>0) for (b in 1:n.blocks) # work through prediction blocks
{ start<-stop+1
stop<-start+b.size[b]-1
if (n.blocks==1) data <- newdata else data<-newdata[start:stop,]
......@@ -2208,6 +2251,11 @@ predict.gam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,
if (!is.null(Xfrag.off)) { Xoff[,k] <- Xfrag.off; any.soff <- TRUE }
if (type=="terms"||type=="iterms") ColNames[n.pterms+k]<-object$smooth[[k]]$label
}
if (!is.null(object$Xcentre)) { ## Apply any column centering
X <- sweep(X,2,object$Xcentre)
}
# have prediction matrix for this block, now do something with it
if (type=="lpmatrix") {
H[start:stop,]<-X
......@@ -2300,7 +2348,7 @@ predict.gam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,
}
H<-list(fit=fit,se.fit=se)
} else {
H<-fit
H <- fit
if (is.null(nrow(H))) names(H) <- rn else
rownames(H)<-rn
H <- napredict(na.act,H)
......@@ -2569,7 +2617,11 @@ summary.gam <- function (object, dispersion = NULL, freq = FALSE,alpha=0, ...)
if (m>0) # form test statistics for each smooth
{ if (!freq) {
if (nrow(object$model)>3000) { ## subsample to get X for p-values calc.
seed <- get(".Random.seed",envir=.GlobalEnv) ## store RNG seed
seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed
if (inherits(seed,"try-error")) {
runif(1)
seed <- get(".Random.seed",envir=.GlobalEnv)
}
kind <- RNGkind(NULL)
RNGkind("default","default")
set.seed(11) ## ensure repeatability
......@@ -3245,7 +3297,7 @@ print.mgcv.version <- function()
set.mgcv.options <- function()
## function used to set optional value used in notLog
## and notExp...
{ runif(1) ## ensure there is a seed
{ ##runif(1) ## ensure there is a seed (can be removed by user!)
options(mgcv.vc.logrange=25)
}
......
......@@ -1454,11 +1454,11 @@ vis.gam <- function(x,view=NULL,cond=list(),n.grid=30,too.far=0,col=NA,color="he
## turn newd into a model frame, so that names and averages are valid
#attributes(newd)<-attributes(x$model)
#attr(newd,"row.names")<-as.character(1:(n.grid*n.grid))
fv<-predict.gam(x,newdata=newd,se=TRUE,type=type)
z<-fv$fit # store NA free copy now
fv <- predict.gam(x,newdata=newd,se.fit=TRUE,type=type)
z <- fv$fit # store NA free copy now
if (too.far>0) # exclude predictions too far from data
{ ex.tf<-exclude.too.far(v1,v2,x$model[,view[1]],x$model[,view[2]],dist=too.far)
fv$se.fit[ex.tf]<-fv$fit[ex.tf]<-NA
{ ex.tf <- exclude.too.far(v1,v2,x$model[,view[1]],x$model[,view[2]],dist=too.far)
fv$se.fit[ex.tf] <- fv$fit[ex.tf]<-NA
}
# produce a continuous scale in place of any factors
if (is.factor(m1))
......
This diff is collapsed.
......@@ -2,6 +2,29 @@
*** denotes really big changes
ISSUES:
1.7-3
** "ds" (Duchon splines) smooth class added. See ?Duchon.spline
** "sos" (spline on the sphere) smooth class added. See ?Spherical.Spline.
* Extended quasi-likelihood used with RE/ML smoothness selection and
quasi families.
* random subsampling code in bam, sos and tp smooths modified a little, so
that .Random.seed is set if it doesn't exist.
* `control' argument changed for gam/bam/gamm to a simple list, which is
then passed to gam.control (or lmeControl), to match `glm'.
* Efficiency of Lanczos iteration code improved, by restructuring, and
calling LAPACK for the eigen decompostion of the working tri-diagonal
matrix.
* Slight modification to `t2' marginal reparameterization, so that `main
effects' can be extracted more easily, if required.
1.7-2
* `polys.plot' now exported, to facilitate plotting of results for
......
citHeader("2004 for additive model method and basics of gamm; 2008 for
generalized additive model method; 2006 for overview; 2003 for thin plate
regression splines; 2000 is the original method, but no longer the default.")
citHeader("2011 for generalized additive model method;
2004 for strictly additive GCV based model method and basics of gamm;
2006 for overview; 2003 for thin plate regression splines;
2000 is the original method, but no longer the default.")
citEntry(
entry="Article",
title="Fast stable restricted maximum likelihood and marginal
likelihood estimation of semiparametric generalized linear models",
journal="Journal of the Royal Statistical Society (B)",
volume= "73",
number="1",
pages="3-36",
year="2011",
author="S. N. Wood",
textVersion="Wood, S.N. (2011) Fast stable restricted maximum likelihood
and marginal likelihood estimation of semiparametric generalized linear
models. Journal of the Royal Statistical Society (B) 73(1):3-36" )
citEntry(
entry="Article",
......@@ -18,21 +32,6 @@ citEntry(
American Statistical Association. 99:673-686."
)
citEntry(
entry="Article",
title="Fast stable direct fitting and smoothness selection for generalized
additive models",
journal="Journal of the Royal Statistical Society (B)",
volume= "70",
number="3",
pages="495-518",
year="2008",
author="S. N. Wood",
textVersion="Wood, S.N. (2008) Fast stable direct fitting and smoothness
selection for generalized additive models. Journal of the Royal
Statistical Society (B) 70(3):495-518 " )
citEntry(
entry="Book",
title="Generalized Additive Models: An Introduction with R",
......
......@@ -15,7 +15,7 @@ for large datasets.
}
\usage{
bam(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,
na.action=na.omit, offset=NULL,method="REML",control=gam.control(),
na.action=na.omit, offset=NULL,method="REML",control=list(...),
scale=0,gamma=1,knots=NULL,sp=NULL,min.sp=NULL,paraPen=NULL,
chunk.size=10000,rho=0,...)
}
......@@ -62,8 +62,8 @@ Mallows' Cp/UBRE/AIC for known scale. \code{"GACV.Cp"} is equivalent, but using
for REML estimation, including of unknown scale, \code{"P-REML"} for REML estimation, but using a Pearson estimate
of the scale. \code{"ML"} and \code{"P-ML"} are similar, but using maximum likelihood in place of REML. }
\item{control}{A list of fit control parameters returned by
\code{\link{gam.control}}.}
\item{control}{A list of fit control parameters to replace defaults returned by
\code{\link{gam.control}}. Any control parameters not supplied stay at their default values.}
\item{scale}{ If this is positive then it is taken as the known scale parameter. Negative signals that the
......
......@@ -40,7 +40,7 @@ For very large datasets see \code{\link{bam}}, for mixed GAM see \code{\link{gam
gam(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,
na.action,offset=NULL,method="GCV.Cp",
optimizer=c("outer","newton"),control=gam.control(),scale=0,
optimizer=c("outer","newton"),control=list(...),scale=0,
select=FALSE,knots=NULL,sp=NULL,min.sp=NULL,H=NULL,gamma=1,
fit=TRUE,paraPen=NULL,G=NULL,in.out,...)
}
......@@ -56,7 +56,9 @@ to the right hand side to specify that the linear predictor depends on smooth fu
\item{family}{
This is a family object specifying the distribution and link to use in
fitting etc. See \code{\link{glm}} and \code{\link{family}} for more
details. A negative binomial family is provided: see \code{\link{negbin}}.
details. A negative binomial family is provided: see \code{\link{negbin}}.
\code{quasi} families actually result in the use of extended quasi-likelihood
if \code{method} is set to a RE/ML method (McCullagh and Nelder, 1989, 9.6).
}
\item{data}{ A data frame or list containing the model response variable and
......@@ -79,8 +81,8 @@ that this offset will always be completely ignored when predicting, unlike an of
included in \code{formula}: this conforms to the behaviour of
\code{lm} and \code{glm}.}
\item{control}{A list of fit control parameters returned by
\code{\link{gam.control}}.}
\item{control}{A list of fit control parameters to replace defaults returned by
\code{\link{gam.control}}. Values not set assume default values. }
\item{method}{The smoothing parameter estimation method. \code{"GCV.Cp"} to use GCV for unknown scale parameter and
Mallows' Cp/UBRE/AIC for known scale. \code{"GACV.Cp"} is equivalent, but using GACV in place of GCV. \code{"REML"}
......@@ -251,8 +253,8 @@ components to be penalized via argument \code{paraPen} and allows the linear pre
general linear functionals of smooths, via the summation convention mechanism described in
\code{\link{linear.functional.terms}}.
Details of the default underlying fitting methods are given in Wood (2004
and 2008). Some alternative methods are discussed in Wood (2000 and 2006).
Details of the default underlying fitting methods are given in Wood (2011
and 2004). Some alternative methods are discussed in Wood (2000 and 2006).
}
......@@ -261,12 +263,13 @@ and 2008). Some alternative methods are discussed in Wood (2000 and 2006).
Key References on this implementation:
Wood, S.N. (2011) Fast stable restricted maximum likelihood
and marginal likelihood estimation of semiparametric generalized linear
models. Journal of the Royal Statistical Society (B) 73(1):3-36
Wood, S.N. (2004) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. J. Amer. Statist. Ass. 99:673-686. [Default
method for additive case (but no longer for generalized)]
Wood, S.N. (2008) Fast stable direct fitting and smoothness selection for generalized
additive models. J.R.Statist.Soc.B 70(3):495-518. [Generalized additive model methods]
method for additive case by GCV (but no longer for generalized)]
Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114
......@@ -301,6 +304,8 @@ the Newton method. SIAM J. Sci. Statist. Comput. 12:383-398
Gu (2002) Smoothing Spline ANOVA Models, Springer.
McCullagh and Nelder (1989) Generalized Linear Models 2nd ed. Chapman & Hall.
O'Sullivan, Yandall and Raynor (1986) Automatic smoothing of regression
functions in generalized linear models.
J. Am. Statist.Ass. 81:96-103
......
......@@ -108,12 +108,13 @@ default value 1e7.
\references{
Wood, S.N. (2011) Fast stable restricted maximum likelihood
and marginal likelihood estimation of semiparametric generalized linear
models. Journal of the Royal Statistical Society (B) 73(1):3-36
Wood, S.N. (2004) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. J. Amer. Statist. Ass.99:673-686.
Wood, S.N. (2008) Fast stable direct fitting and smoothness selection for generalized
additive models. J.R.Statist.Soc.B 70(3):495-518.
\url{http://www.maths.bath.ac.uk/~sw283/}
......
......@@ -118,14 +118,13 @@ of the families as described in \code{\link{fix.family.link}}.
\references{
Wood, S.N. (2011) Fast stable restricted maximum likelihood
and marginal likelihood estimation of semiparametric generalized linear
models. Journal of the Royal Statistical Society (B) 73(1):3-36
O 'Sullivan, Yandall & Raynor (1986) Automatic smoothing of regression
functions in generalized linear models. J. Amer. Statist. Assoc. 81:96-103.
Wood, S.N. (2008) Fast stable direct fitting and smoothness selection for
generalized additive models. J.R.Statist. Soc. B 70(3):495-518
\url{http://www.maths.bath.ac.uk/~sw283/}
}
......
......@@ -126,8 +126,8 @@ where \eqn{f}{f} is a smooth function, and \eqn{z_i}{z_i} is a numeric variable.
The appropriate formula is:\cr
\code{y ~ s(x,by=z)}\cr
- the \code{by} argument ensures that the smooth function gets multiplied by
covariate \code{z}. Note that when using factor variables, then the presence of centering constraints usually
means that the by variable should be included as a parametric term, as well.
covariate \code{z}. Note that when using factor by variables, centering constraints are applied to the smooths,
which usually means that the by variable should be included as a parametric term, as well.
The example code below also illustrates the use of factor \code{by} variables.
......@@ -136,7 +136,7 @@ The example code below also illustrates the use of factor \code{by} variables.
If a \code{by} variable is present and numeric (rather than a factor) then the corresponding smooth is only subjected
to an identifiability constraint if (i) the \code{by} variable is a constant vector, or, (ii) for a matrix
\code{by} variable, \code{L}, if \code{L\%*\%rep(1,ncol(L))} is constant or (iii) if a user defined smooth constructor
supplies an identifiability constraint explicitly.
supplies an identifiability constraint explicitly, and that constraint has an attibute \code{"always.apply"}.
}
\section{Linking smooths with `id'}{
......@@ -188,7 +188,8 @@ are provided in
}
\section{Random effects}{
Random effects can be added to \code{gam} models using \code{s(...,bs="re")} terms (see \code{\link{smooth.construct.re.smooth.spec}}),
Random effects can be added to \code{gam} models using \code{s(...,bs="re")} terms (see
\code{\link{smooth.construct.re.smooth.spec}}),
or the \code{paraPen} argument to \code{\link{gam}} covered below. See \code{\link{gam.vcomp}}, \code{\link{random.effects}} and
\code{\link{smooth.construct.re.smooth.spec}} for further details. An alternative is to use the approach of \code{\link{gamm}}.
......@@ -295,7 +296,8 @@ sig.b <- sqrt(rm$sig2/rm$sp[1]);sig.b
rm1 <- gam(y ~ s(fac,bs="re")+s(x0)+s(x1)+s(x2)+s(x3),data=dat,method="ML")
gam.vcomp(rm1)
## Simple comparison with lme, using Rail data. See ?random.effects for a simpler method
## Simple comparison with lme, using Rail data.
## See ?random.effects for a simpler method
require(nlme)
b0 <- lme(travel~1,data=Rail,~1|Rail,method="ML")
Z <- model.matrix(~Rail-1,data=Rail,
......
......@@ -55,10 +55,9 @@ See Wood (2008) for full details on `outer iteration'.
}
\references{
Wood, S.N. (2008) Fast stable direct fitting and smoothness selection for generalized
additive models. J.R.Statist.Soc.B 70(3):495-518
Wood, S.N. (2011) Fast stable restricted maximum likelihood
and marginal likelihood estimation of semiparametric generalized linear
models. Journal of the Royal Statistical Society (B) 73(1):3-36
\url{http://www.maths.bath.ac.uk/~sw283/}
......
......@@ -140,6 +140,12 @@ Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114
Wood, S.N. (2008) Fast stable direct fitting and smoothness selection for
generalized additive models. J.R.Statist. Soc. B 70(3):495-518
Wood, S.N. (2011) Fast stable restricted maximum likelihood
and marginal likelihood estimation of semiparametric generalized linear
models. Journal of the Royal Statistical Society (B) 73(1):3-36
\url{http://www.maths.bath.ac.uk/~sw283/}
}
......
......@@ -49,6 +49,12 @@ computed, and reports the numerical rank of the covariance matrix.
Wood, S.N. (2008) Fast stable direct fitting and smoothness
selection for generalized additive models. Journal of the Royal
Statistical Society (B) 70(3):495-518
Wood, S.N. (2011) Fast stable restricted maximum likelihood
and marginal likelihood estimation of semiparametric generalized linear
models. Journal of the Royal Statistical Society (B) 73(1):3-36
}
......
......@@ -37,6 +37,10 @@ was first proposed in O'Sullivan et al. (1986).
\references{
Wood, S.N. (2011) Fast stable restricted maximum likelihood
and marginal likelihood estimation of semiparametric generalized linear
models. Journal of the Royal Statistical Society (B) 73(1):3-36