Commit fe0211b0 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.6-1

parent 49710d4d
Package: mgcv
Version: 1.6-0
Version: 1.6-1
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: GAMs with GCV/AIC/REML smoothness estimation and GAMMs by PQL
......@@ -12,6 +12,6 @@ Imports: graphics, stats, nlme, Matrix
Suggests: nlme (>= 3.1-64), splines, Matrix
LazyLoad: yes
License: GPL (>= 2)
Packaged: 2009-11-18 15:17:40 UTC; simon
Packaged: 2009-12-01 16:17:22 UTC; simon
Repository: CRAN
Date/Publication: 2009-11-18 17:00:51
Date/Publication: 2009-12-01 17:19:02
......@@ -1273,7 +1273,7 @@ estimate.gam <- function (G,method,optimizer,control,in.out,scale,gamma,...) {
## correct null deviance if there's an offset [Why not correct calc in gam.fit/3???]....
if (G$intercept&&any(G$offset!=0)) object$null.deviance <-
glm(G$y~offset(G$offset),family=object$family)$deviance
glm(G$y~offset(G$offset),family=object$family,weights=object$prior.weights)$deviance
object$method <- criterion
......@@ -1485,9 +1485,9 @@ gam.check <- function(b)
} else { ## just default print of information...
cat("\n");print(b$outer.info)
}
} else { ## perf iter or AM case
if (b$mgcv.conv$iter==0)
cat("\nModel required no smoothing parameter selection")
} else { ## no sp, perf iter or AM case
if (length(b$sp)==0) ## no sp's estimated
cat("\nModel required no smoothing parameter selection")
else {
cat("\nSmoothing parameter selection converged after",b$mgcv.conv$iter,"iteration")
if (b$mgcv.conv$iter>1) cat("s")
......
......@@ -57,13 +57,14 @@ cSplineDes <- function (x, knots, ord = 4)
if (ord<2) stop("order too low")
if (nk<ord) stop("too few knots")
knots <- sort(knots)
if (min(x)<knots[1]||max(x)>knots[nk]) stop("x out of range")
xc <- knots[nk-ord+1] ## wrapping invloved above this point
k1 <- knots[1]
if (min(x)<k1||max(x)>knots[nk]) stop("x out of range")
xc <- knots[nk-ord+1] ## wrapping involved above this point
## copy end intervals to start, for wrapping purposes...
knots <- c(knots[1]-(knots[nk]-knots[(nk-ord+1):(nk-1)]),knots)
knots <- c(k1-(knots[nk]-knots[(nk-ord+1):(nk-1)]),knots)
ind <- x>xc ## index for x values where wrapping is needed
X1 <- splineDesign(knots,x,ord,outer.ok=TRUE)
x[ind] <- x[ind]-max(knots)
x[ind] <- x[ind] - max(knots) + k1
if (sum(ind)) {
X2 <- splineDesign(knots,x[ind],ord,outer.ok=TRUE) ## wrapping part
X1[ind,] <- X1[ind,] + X2
......@@ -585,6 +586,7 @@ smooth.construct.cr.smooth.spec<-function(object,data,knots)
# It takes a cubic regression spline specification object and returns the
# corresponding basis object.
{ shrink <- attr(object,"shrink")
if (length(object$term)!=1) stop("Basis only handles 1D smooths")
x <- data[[object$term]]
nx<-length(x)
if (is.null(knots)) ok <- FALSE
......@@ -733,6 +735,7 @@ smooth.construct.cc.smooth.spec<-function(object,data,knots)
list(B=B,D=D)
} # end of getBD local function
# evaluate covariate, x, and knots, k.
if (length(object$term)!=1) stop("Basis only handles 1D smooths")
x <- data[[object$term]]
if (object$bs.dim < 0 ) object$bs.dim <- 10 ## default
if (object$bs.dim <4) { object$bs.dim <- 4
......@@ -813,6 +816,7 @@ smooth.construct.cp.smooth.spec<-function(object,data,knots)
if (object$bs.dim<0) object$bs.dim <- max(10,m[1]) ## default
nk <- object$bs.dim +1 ## number of interior knots
if (nk<=m[1]) stop("basis dimension too small for b-spline order")
if (length(object$term)!=1) stop("Basis only handles 1D smooths")
x <- data[[object$term]] # find the data
k <- knots[[object$term]]
......@@ -878,6 +882,7 @@ smooth.construct.ps.smooth.spec<-function(object,data,knots)
if (object$bs.dim<0) object$bs.dim <- max(10,m[1]+1) ## default
nk <- object$bs.dim - m[1] # number of interior knots
if (nk<=0) stop("basis dimension too small for b-spline order")
if (length(object$term)!=1) stop("Basis only handles 1D smooths")
x <- data[[object$term]] # find the data
k <- knots[[object$term]]
if (is.null(k)) { xl <- min(x);xu <- max(x) } else
......
** denotes quite substantial/important changes
*** denotes really big changes
1.6-1
* Bug in cSplineDes caused bases to be non-cyclic unless first
knot was at 0. This also affected the "cp" smoother class. Fixed.
* null.deviance calculation was wrong for case with offset and
weights. Fixed.
* Built in strictly 1D smoothers now give an informative error message if
an attempt is made to use them for multidimensional smoothing.
* gam.check generated a spurious error when applied to a model with no
estimated smoothing parameters. Fixed.
1.6-0
*** Routine `bam' added for fitting GAMs to very large datasets.
......
......@@ -129,7 +129,8 @@ Note that POI is not as stable as the default nested iteration used with \code{\
datasets, this is unlikely to matter much.
Note that it is possible to spend most of the computational time on basis evaluation, if an expensive basis is used. In practice
this means that the default \code{"tp"} basis should be avoided.
this means that the default \code{"tp"} basis should be avoided: almost any other basis (e.g. \code{"cr"} or \code{"ps"})
can be used in the 1D case, and tensor product smooths (\code{te}) are typically much less costly in the multi-dimensional case.
}
......@@ -146,7 +147,7 @@ this means that the default \code{"tp"} basis should be avoided.
\section{WARNINGS }{
The routine will be slow if the default \code{"tp"} basis is used.
The routine will be slow if the default \code{"tp"} basis is used.
You must have more unique combinations of covariates than the model has total
parameters. (Total parameters is sum of basis dimensions plus sum of non-spline
......
......@@ -31,9 +31,27 @@ last knot locations.}
\seealso{\code{\link{cyclic.p.spline}}}
\examples{
x <- 0:100/100;k<- 0:5/5
X <- cSplineDes(x,k)
## create some x's and knots...
n <- 200
x <- 0:(n-1)/(n-1);k<- 0:5/5
X <- cSplineDes(x,k) ## cyclic spline design matrix
## plot evaluated basis functions...
plot(x,X[,1],type="l"); for (i in 2:5) lines(x,X[,i],col=i)
## check that the ends match up....
ee <- X[1,]-X[n,];ee
tol <- .Machine$double.eps^.75
if (all.equal(ee,ee*0,tol=tol)!=TRUE)
stop("cyclic spline ends don't match!")
## similar with uneven data spacing...
x <- sort(runif(n)) + 1 ## sorting just makes end checking easy
k <- seq(min(x),max(x),length=8) ## create knots
X <- cSplineDes(x,k) ## get cyclic spline model matrix
plot(x,X[,1],type="l"); for (i in 2:ncol(X)) lines(x,X[,i],col=i)
ee <- X[1,]-X[n,];ee ## do ends match??
tol <- .Machine$double.eps^.75
if (all.equal(ee,ee*0,tol=tol)!=TRUE)
stop("cyclic spline ends don't match!")
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..
......
......@@ -8,7 +8,7 @@ penalized regression smoothers.
Penalized regression smoothers gain computational efficiency by virtue of
being defined using a basis of relatively modest size, \code{k}. When setting
up models in the \code{mgcv} package, using \code{\link{s}} or \code{\link{te}}
terms in a model formula, \code{k} must be chosen.
terms in a model formula, \code{k} must be chosen: the defualts are essentially arbitrary.
In practice \code{k-1} (or \code{k}) sets the upper limit on the degrees of freedom
associated with an \code{\link{s}} smooth (1 degree of freedom is usually lost to the identifiability
......@@ -16,7 +16,7 @@ constraint on the smooth). For \code{\link{te}} smooths the upper limit of the
degrees of freedom is given by the product of the \code{k} values provided for
each marginal smooth less one, for the constraint. However the actual
effective degrees of freedom are controlled by the degree of penalization
selected during fitting, by GCV, AIC or whatever is specified. The exception
selected during fitting, by GCV, AIC, REML or whatever is specified. The exception
to this is if a smooth is specified using the \code{fx=TRUE} option, in which
case it is unpenalized.
......@@ -36,7 +36,11 @@ an equivalent, single, smooth to the residuals, using a substantially
increased \code{k} to see if there is pattern in the residuals that could
potentially be explained by increasing \code{k}. Examples are provided below.
More sophisticated approaches based on partial residuals are also possible.
The obvious, but more costly, alternative is simply to increase the suspect \code{k}
and refit the original model. If there are no statistically important changes as a result of
doing this, then \code{k} was large enough. (Change in the smoothness selection criterion,
and/or the effective degrees of freedom, when \code{k} is increased, provide the obvious
numerical measures for whether the fit has changed substantially.)
One scenario that can cause confusion is this: a model is fitted with
\code{k=10} for a smooth term, and the EDF for the term is estimated as 7.6,
......@@ -66,10 +70,12 @@ Wood, S.N. (2006) Generalized Additive Models: An Introduction with R. CRC.
library(mgcv)
set.seed(0)
dat <- gamSim(1,n=400,scale=2)
## fit a GAM with quite low `k'
b<-gam(y~s(x0,k=6)+s(x1,k=6)+s(x2,k=6)+s(x3,k=6),data=dat)
plot(b,pages=1)
## Economical tactic (see below for more obvious approach)....
## check for residual pattern, removeable by increasing `k'
## typically `k', below, chould be substantially larger than
## the original, `k' but certainly less than n/2.
......@@ -106,6 +112,27 @@ gam(rsd~s(x2,k=40,bs="cs"),gamma=1.4,data=dat) ## `k' too low
gam(rsd~s(x3,k=40,bs="cs"),gamma=1.4,data=dat) ## fine
rm(dat)
## More obvious, but more expensive tactic... Just increase
## suspicious k until fit is stable.
set.seed(0)
dat <- gamSim(1,n=400,scale=2)
## fit a GAM with quite low `k'
b<-gam(y~s(x0,k=6)+s(x1,k=6)+s(x2,k=6)+s(x3,k=6),data=dat,method="REML")
b
## edf for 3rd smooth is highest as proportion of k -- increase k
b<-gam(y~s(x0,k=6)+s(x1,k=6)+s(x2,k=12)+s(x3,k=6),data=dat,method="REML")
b
## edf substantially up, -ve REML substantially down
b<-gam(y~s(x0,k=6)+s(x1,k=6)+s(x2,k=24)+s(x3,k=6),data=dat,method="REML")
b
## slight edf increase and -ve REML change
b<-gam(y~s(x0,k=6)+s(x1,k=6)+s(x2,k=40)+s(x3,k=6),data=dat,method="REML")
b
## defintely stabilized (but really k around 20 would have been fine here)
}
\keyword{models} \keyword{regression}%-- one or more ..
......
......@@ -634,3 +634,6 @@ msgstr ""
msgid "`by' variable must be same dimension as smooth arguments"
msgstr ""
msgid "Basis only handles 1D smooths"
msgstr ""
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment