Commit 98c7e43b authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.3-18

parent ce7c996b
Package: mgcv
Version: 1.3-17
Version: 1.3-18
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: GAMs with GCV smoothness estimation and GAMMs by REML/PQL
......@@ -12,4 +12,4 @@ Imports: graphics, stats
Suggests: nlme (>= 3.1-64), MASS (>= 7.2-2)
LazyLoad: yes
License: GPL version 2 or later
Packaged: Wed May 17 11:35:14 2006; simon
Packaged: Thu Jul 20 14:08:34 2006; simon
......@@ -557,9 +557,20 @@ gamm.setup<-function(formula,pterms,data=stop("No data supplied to gam.setup"),k
G
}
varWeights.dfo <- function(b,data)
## get reciprocal *standard deviations* implied by the estimated variance
## structure of an lme object, b, in *original data frame order*.
{ w<-varWeights(b$modelStruct$varStruct)
# w is not in data.frame order - it's in inner grouping level order
group.name<-names(b$groups) # b$groups[[i]] doesn't always retain factor ordering
order.txt <- paste("ind<-order(data[[\"",group.name[1],"\"]]",sep="")
if (length(b$groups)>1) for (i in 2:length(b$groups))
order.txt <- paste(order.txt,",data[[\"",group.name[i],"\"]]",sep="")
order.txt <- paste(order.txt,")")
eval(parse(text=order.txt))
w[ind] <- w # into data frame order
w
}
extract.lme.cov2<-function(b,data,start.level=1)
# function to extract the response data covariance matrix from an lme fitted
......@@ -976,7 +987,8 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
### Actually do fitting ....
if (family$family=="gaussian"&&family$link=="identity"&&
length(offset.name)==0) lme.used <- TRUE else lme.used <- FALSE
if (lme.used&&!is.null(weights)&&!inherits(weights,"varFunc")) lme.used <- FALSE
if (lme.used)
{ ## following construction is a work-around for problem in nlme 3-1.52
eval(parse(text=paste("ret$lme<-lme(",deparse(fixed.formula),
......@@ -985,6 +997,8 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
##ret$lme<-lme(fixed.formula,random=rand,data=mf,correlation=correlation,control=control)
} else
{ ## Again, construction is a work around for nlme 3-1.52
if (inherits(weights,"varFunc"))
stop("weights must be like glm weights for generalized case")
if (verbosePQL) cat("\n Maximum number of PQL iterations: ",niterPQL,"\n")
eval(parse(text=paste("ret$lme<-glmmPQL(",deparse(fixed.formula),
",random=rand,data=strip.offset(mf),family=family,correlation=correlation,control=control,",
......@@ -1136,7 +1150,10 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
names(object$coefficients)<-term.names # note - won't work on matrices!!
if (is.null(weights))
object$prior.weights <- object$y*0+1
else if (inherits(weights,"varFunc"))
object$prior.weights <- varWeights.dfo(ret$lme,mf)^2
else object$prior.weights <- weights
object$weights<-object$prior.weights
ret$gam<-object
......
......@@ -410,14 +410,14 @@ s <- function (..., k=-1,fx=FALSE,bs="tp",m=0,by=NA)
{ vars<-as.list(substitute(list(...)))[-1] # gets terms to be smoothed without evaluation
# call<-match.call() # get function call
d<-length(vars) # dimension of smoother
term<-deparse(vars[[d]],backtick=TRUE) # last term in the ... arguments
by.var<-deparse(substitute(by),backtick=TRUE) #getting the name of the by variable
term<-deparse(vars[[d]],backtick=TRUE,width.cutoff=500) # last term in the ... arguments
by.var<-deparse(substitute(by),backtick=TRUE,width.cutoff=500) #getting the name of the by variable
if (by.var==".") stop("by=. not allowed")
term<-deparse(vars[[1]],backtick=TRUE) # first covariate
term<-deparse(vars[[1]],backtick=TRUE,width.cutoff=500) # first covariate
if (term[1]==".") stop("s(.) not yet supported.")
if (d>1) # then deal with further covariates
for (i in 2:d)
{ term[i]<-deparse(vars[[i]],backtick=TRUE)
{ term[i]<-deparse(vars[[i]],backtick=TRUE,width.cutoff=500)
if (term[i]==".") stop("s(.) not yet supported.")
}
for (i in 1:d) term[i] <- attr(terms(reformulate(term[i])),"term.labels")
......@@ -441,8 +441,9 @@ s <- function (..., k=-1,fx=FALSE,bs="tp",m=0,by=NA)
full.call<-paste("s(",term[1],sep="")
if (d>1) for (i in 2:d) full.call<-paste(full.call,",",term[i],sep="")
label<-paste(full.call,")",sep="") # used for labelling parameters
full.call<-paste(full.call,",k=",deparse(k,backtick=TRUE),",fx=",deparse(fx,backtick=TRUE),",bs=",
deparse(bs,backtick=TRUE),",m=",deparse(m,backtick=TRUE),
full.call<-paste(full.call,",k=",deparse(k,backtick=TRUE,width.cutoff=500),",fx=",
deparse(fx,backtick=TRUE,width.cutoff=500),",bs=",
deparse(bs,backtick=TRUE,width.cutoff=500),",m=",deparse(m,backtick=TRUE,width.cutoff=500),
",by=",by.var,")",sep="")
ret<-list(term=term,bs.dim=k,fixed=fx,dim=d,p.order=m,by=by.var,full.call=full.call,label=label)
class(ret)<-paste(bs,".smooth.spec",sep="")
......@@ -465,7 +466,7 @@ smooth.construct.tensor.smooth.spec<-function(object,data,knots)
if (object$np) # reparameterize
for (i in 1:m)
{ if (object$margin[[i]]$dim==1) {
if (!inherits(object$margin[[i]],c("cs.smooth","cr.smooth"))) { # these classes already optimal
if (!inherits(object$margin[[i]],c("cs.smooth","cr.smooth","cyclic.smooth"))) { # these classes already optimal
x <- get.var(object$margin[[i]]$term,data)
np <- ncol(object$margin[[i]]$X) ## number of params
## note: to avoid extrapolating wiggliness measure
......
1.3-18
* gamm modifed so that weights dealt with properly if lme type varFunc
used. This is only possible in the non-generalized case, as gamm.Rd
now clarifies.
* slight modification to s() to add `width.cutoff=500' to `deparse'
* by variables not handled properly in p-spline example in
smooth.construct.Rd - fixed.
* bug fix in summary.gam.Rd example (pmax -> pmin)
* color example added to plot.gam.Rd
* bug fix in `smooth.construct.tensor.smooth.spec' - class "cyclic.smooth"
marginals no longer re-parameterized.
* `te' documentation modified to mention that marginal reparameterization
can destabilize tensor products.
1.3-17
* print.summary.gam prints estimated ranks more prettily (thanks Martin
......
......@@ -66,8 +66,12 @@ The default \code{gaussian} with identity link causes \code{gamm} to fit by a di
covariates required by the formula. By default the variables are taken
from \code{environment(formula)}, typically the environment from
which \code{gamm} is called.}
\item{weights}{ prior weights on the data. See documentation for \code{lme}
for details of how to use this argument.}
\item{weights}{In the generalized case, weights with the same meaning as
\code{\link{glm}} weights. An \code{lme} type weights argument may only be
used in the identity link gaussian case, with no offset (see documentation for \code{lme}
for details of how to use such an argument).}
\item{subset}{ an optional vector specifying a subset of observations to be
used in the fitting process.}
\item{na.action}{ a function which indicates what should happen when the data
......
......@@ -167,23 +167,46 @@ The function can not deal with smooths of more than 2 variables!
\examples{
library(mgcv)
set.seed(0)
## fake some data...
f1 <- function(x) {exp(2 * x)}
f2 <- function(x) {
0.2*x^11*(10*(1-x))^6+10*(10*x)^3*(1-x)^10
}
f3 <- function(x) {x*0}
n<-200
sig2<-4
x0 <- rep(1:4,50)
x1 <- runif(n, 0, 1)
x2 <- runif(n, 0, 1)
x3 <- runif(n, 0, 1)
y <- 2 * x0
y <- y + exp(2 * x1) - 3.75887
y <- y+0.2*x2^11*(10*(1-x2))^6+10*(10*x2)^3*(1-x2)^10-1.396
e <- rnorm(n, 0, sqrt(abs(sig2)))
y <- y + e
e <- rnorm(n, 0, sqrt(sig2))
y <- 2*x0 + f1(x1) + f2(x2) + f3(x3) + e
x0 <- factor(x0)
## fit and plot...
b<-gam(y~x0+s(x1)+s(x2)+s(x3))
plot(b,pages=1,residuals=TRUE,all.terms=TRUE,shade=TRUE,shade.col=2)
# just parametric term alone
## just parametric term alone...
termplot(b,terms="x0",se=TRUE)
# example with 2-d plots
## more use of color...
op <- par(mfrow=c(2,2),bg="blue")
x <- 0:1000/1000
for (i in 1:3) {
plot(b,select=i,rug=FALSE,col="green",
col.axis="white",col.lab="white",all.terms=TRUE)
for (j in 1:2) axis(j,col="white",labels=FALSE)
box(col="white")
eval(parse(text=paste("fx <- f",i,"(x)",sep="")))
fx <- fx-mean(fx)
lines(x,fx,col=2) ## overlay `truth' in red
}
par(op)
## example with 2-d plots...
b1<-gam(y~x0+s(x1,x2)+s(x3))
op<-par(mfrow=c(2,2))
plot(b1,all.terms=TRUE)
......
......@@ -188,7 +188,14 @@ Predict.matrix.pspline.smooth<-function(object,data)
# prediction method function for the p.spline smooth class
{ require(splines)
x <- get.var(object$term,data)
spline.des(object$knots,x,object$m[1]+2,x*0)$design
X <- spline.des(object$knots,x,object$m[1]+2,x*0)$design
if (object$by != "NA") { ## handle any by variables
by <- get.var(object$by, data)
if (is.null(by))
stop("Can't find by variable")
X <- as.numeric(by) * X
}
X
}
# an example, using the new class....
......
......@@ -189,7 +189,7 @@ plot(b,pages=1)
summary(b)
# now check the p-values by using a pure regression spline.....
b.d<-round(b$edf)+1
b.d<-pmax(b.d,3) # can't have basis dimension less than this!
b.d<-pmin(b.d,3) # can't have basis dimension less than this!
bc<-gam(y~s(x0,k=b.d[1],fx=TRUE)+s(x1,k=b.d[2],fx=TRUE)+
s(x2,k=b.d[3],fx=TRUE)+s(x3,k=b.d[4],fx=TRUE))
plot(bc,pages=1)
......@@ -201,6 +201,7 @@ for (i in 1:k)
p[i]<-summary(b)$s.p[1]
}
plot(((1:k)-0.5)/k,sort(p))
abline(0,1,col=2)
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ...
......
......@@ -51,11 +51,11 @@ set \code{d=c(2,1)}.}
a single penalty: single penalties are not recommended - they tend to allow only rather
wiggly models.}
\item{np}{ \code{TRUE} to use the `natural parameterization' for a tensor
\item{np}{ \code{TRUE} to use the `normal parameterization' for a tensor
product smooth. This parameterization represents any 1-d marginal smooths
using a parameterization where the parameters are function values at `knots'
spread evenly through the data. The parameterization make the penalties
easily interpretable.}
spread evenly through the data. The parameterization makes the penalties
easily interpretable, however it can reduce numerical stability.}
}
\details{ Smooths of several covariates can be constructed from tensor products of the bases
......@@ -77,13 +77,16 @@ nested within GAMs constructed from higher rank tensor product smooths if the
same marginal bases are used in both cases (the marginal smooths themselves
are just special cases of tensor product smooths.)
The `natural parameterization' (\code{np=TRUE}) re-parameterizes the marginal
The `normal parameterization' (\code{np=TRUE}) re-parameterizes the marginal
smooths of a tensor product smooth so that the parameters are function values
at a set of points spread evenly through the range of values of the covariate
of the smooth. This means that the penalty of the tensor product associated
with any particular covariate direction can be interpreted as the penalty of
the appropriate marginal smooth applied in that direction and averaged over
the smooth. Currently this is only done for marginals of a single variable.
the smooth. Currently this is only done for marginals of a single
variable. This parameterization can reduce numerical stability for when used
with marginal smooths other than \code{"cc"}, \code{"cr"} and \code{"cs"}: if
this causes problems, set \code{np=FALSE}.
The function does not evaluate the variable arguments.
......
......@@ -499,3 +499,7 @@ msgstr ""
msgid "X lost dimensions in magic!!"
msgstr ""
msgid "weights must be like glm weights for generalized case"
msgstr ""
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment