Commit d21e4993 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.7-11

parent 21af8f1d
Package: mgcv
Version: 1.7-10
Version: 1.7-11
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: GAMs with GCV/AIC/REML smoothness estimation and GAMMs by PQL
......@@ -13,6 +13,6 @@ Imports: graphics, stats, nlme, Matrix
Suggests: nlme (>= 3.1-64), splines, Matrix
LazyLoad: yes
License: GPL (>= 2)
Packaged: 2011-10-30 23:38:12 UTC; simon
Packaged: 2011-11-14 07:17:20 UTC; simon
Repository: CRAN
Date/Publication: 2011-11-02 17:25:55
Date/Publication: 2011-11-14 09:58:12
d3a130a90087192ce965333aa88fe2ea *DESCRIPTION
dc584e91cdb041efbad55880e8e336f1 *DESCRIPTION
abc5e760b0f5dd22c81d7614c0b304f0 *NAMESPACE
4553efb64e4def029f7c5bdfc0168936 *R/bam.r
257095aabda92b283b68e1b4a55a7976 *R/gam.fit3.r
166f5e589a85cd72305f281e0bc08626 *R/gam.fit3.r
902657a0ee2dedc3fdfa501bf3b37c5b *R/gam.sim.r
085b8b34e68292c173a22e55aca86562 *R/gamm.r
f20157d570c32d97592a2b8f697fc102 *R/mgcv.r
ee4953289666b34c44a2b63816233330 *R/plots.r
5344c3e25715ebeca3996f2d6baba454 *R/smooth.r
2abf5b43692bc0df458a2ddc1b0c6dca *R/gamm.r
0e48e7ed141719f4d9e6476e54efaa79 *R/mgcv.r
bf70158e37e33ea1136efdeab97569f8 *R/plots.r
ceed2e84bd4a17f7da324134ebe1b6b4 *R/smooth.r
fe9745f610246ee1f31eb915ca0d76a9 *R/sparse.r
e9e783dffd81ad4cacc60c521857e11e *changeLog
b3f865a351f7e840e79dfaaaa81d5378 *changeLog
e468195a83fab90da8e760c2c3884bd3 *data/columb.polys.rda
40874e3ced720a596750f499ded8a60a *data/columb.rda
88d77139cc983317b6acd8c5f1252ab9 *gnugpl2.txt
......@@ -17,7 +17,7 @@ f693920e12f8a6f2b6cab93648628150 *index
5bf12ddc0dab9daae72271b96a15c539 *man/Predict.matrix.Rd
c51c9b8c9c73f81895176ded39b91394 *man/Predict.matrix.cr.smooth.Rd
612ab6354541ebe38a242634d73b66ba *man/Tweedie.Rd
bd6e48ef8323677776e6e6e213482bc0 *man/anova.gam.Rd
4891a913dbfaa2dc8f41fef6c8590f21 *man/anova.gam.Rd
c06df4877abdaf07e67fc32c85fd1a87 *man/bam.Rd
b385d6d5419d0d6aefe03af1a79d5c4e *man/bam.update.Rd
4e925cb579f4693d1b8ec2d5092c0b37 *man/cSplineDes.Rd
......@@ -101,7 +101,7 @@ a84c09609eedb6902b3ef7c8885010dd *man/smooth.construct.tp.smooth.spec.Rd
5ae47a140393009e3dba7557af175170 *man/sp.vcov.Rd
83bd8e097711bf5bd0fff09822743d43 *man/spasm.construct.Rd
a17981f0fa2a6a50e637c98c672bfc45 *man/step.gam.Rd
75a0ca35d038c3738ab26705cd6a6bfe *man/summary.gam.Rd
b5fc9c991f1955b2ca25ed730aee4fd3 *man/summary.gam.Rd
22b571cbc0bd1e31f195ad927434c27e *man/t2.Rd
04076444b2c99e9287c080298f9dc1d7 *man/te.Rd
c3c23641875a293593fe4ef032b44aae *man/tensor.prod.model.matrix.Rd
......
......@@ -2033,6 +2033,9 @@ negbin <- function (theta = stop("'theta' must be specified"), link = "log") {
## single `theta' to specify fixed value; 2 theta values (first smaller that second)
## are limits within which to search for theta; otherwise supplied values make up
## search set.
## Note: to avoid warnings, get(".Theta")[1] is used below. Otherwise the initialization
## call to negbin can generate warnings since get(".Theta") returns a vector
## during initialization (only).
linktemp <- substitute(link)
if (!is.character(linktemp)) linktemp <- deparse(linktemp)
if (linktemp %in% c("log", "identity", "sqrt")) stats <- make.link(linktemp)
......@@ -2049,28 +2052,28 @@ negbin <- function (theta = stop("'theta' must be specified"), link = "log") {
}
env <- new.env(parent = .GlobalEnv)
assign(".Theta", theta, envir = env)
variance <- function(mu) mu + mu^2/get(".Theta")
variance <- function(mu) mu + mu^2/get(".Theta")[1]
## dvaraince/dmu needed as well
dvar <- function(mu) 1 + 2*mu/get(".Theta")
dvar <- function(mu) 1 + 2*mu/get(".Theta")[1]
## d2variance/dmu...
d2var <- function(mu) rep(2/get(".Theta"),length(mu))
d2var <- function(mu) rep(2/get(".Theta")[1],length(mu))
d3var <- function(mu) rep(0,length(mu))
getTheta <- function() get(".Theta")
validmu <- function(mu) all(mu > 0)
dev.resids <- function(y, mu, wt) { Theta <- get(".Theta")
dev.resids <- function(y, mu, wt) { Theta <- get(".Theta")[1]
2 * wt * (y * log(pmax(1, y)/mu) -
(y + Theta) * log((y + Theta)/(mu + Theta)))
}
aic <- function(y, n, mu, wt, dev) {
Theta <- get(".Theta")
Theta <- get(".Theta")[1]
term <- (y + Theta) * log(mu + Theta) - y * log(mu) +
lgamma(y + 1) - Theta * log(Theta) + lgamma(Theta) -
lgamma(Theta + y)
2 * sum(term * wt)
}
ls <- function(y,w,n,scale) {
Theta <- get(".Theta")
Theta <- get(".Theta")[1]
ylogy <- y;ind <- y>0;ylogy[ind] <- y[ind]*log(y[ind])
term <- (y + Theta) * log(y + Theta) - ylogy +
lgamma(y + 1) - Theta * log(Theta) + lgamma(Theta) -
......@@ -2084,12 +2087,12 @@ negbin <- function (theta = stop("'theta' must be specified"), link = "log") {
})
rd <- function(mu,wt,scale) {
Theta <- get(".Theta")
Theta <- get(".Theta")[1]
rnbinom(mu,size=Theta,mu=mu)
}
qf <- function(p,mu,wt,scale) {
Theta <- get(".Theta")
Theta <- get(".Theta")[1]
qnbinom(p,size=Theta,mu=mu)
}
......
......@@ -1322,7 +1322,6 @@ gammPQL <- function (fixed, random, family, data, correlation, weights,
gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=list(),weights=NULL,
subset=NULL,na.action,knots=NULL,control=list(niterEM=0,optimMethod="L-BFGS-B"),
niterPQL=20,verbosePQL=TRUE,method="ML",...)
......@@ -1494,7 +1493,7 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
## Grouped random coefs are returned in matrices. Each row relates to one
## level of the grouping factor. So to get coefs in order, with all coefs
## for each grouping factor level contoguous, requires the following...
## for each grouping factor level contiguous, requires the following...
br <- as.numeric(unlist(lapply(ret$lme$coefficients$random,t)))
......@@ -1553,7 +1552,6 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
V <- extract.lme.cov2(ret$lme,mf,n.sr+1) # the data covariance matrix, excluding smooths
## from here not dealing with nested smooths yet....
## obtain XVX and S....
first.para <- last.para <- rep(0,G$m) ## collect first and last para info relevant to expanded Xf
......
......@@ -2500,6 +2500,8 @@ pinvXVX <- function(X,V,rank=NULL) {
## Routine for forming fractionally trunctated
## pseudoinverse of XVX'. Returns as D where
## DD' gives the pseudoinverse itself.
## truncates to numerical rank, if this is
## less than supplied rank+1.
k <- max(0,floor(rank))
nu <- abs(rank - k)
# if (k < 1) { k <- 1; nu <- 0}
......@@ -2511,11 +2513,16 @@ pinvXVX <- function(X,V,rank=NULL) {
V <- (V + t(V))/2
ed <- eigen(V,symmetric=TRUE)
## check that actual rank is not below supplied rank+1
r.est <- sum(ed$values > max(ed$values)*.Machine$double.eps^.9)
if (r.est<k1) {k1 <- k <- r.est;nu <- 0;rank <- r.est}
## Get the eigenvectors...
vec <- qr.qy(qrx,rbind(ed$vectors,matrix(0,nrow(X)-ncol(X),ncol(X))))
if (k1<ncol(vec)) vec <- vec[,1:k1,drop=FALSE]
if (k==0) {
vec <- t(t(vec)*sqrt(nu/ed$val[1]))
attr(vec,"rank") <- rank
return(vec)
}
......@@ -2534,13 +2541,14 @@ pinvXVX <- function(X,V,rank=NULL) {
} else {
vec <- t(t(vec)/sqrt(ed$val[1:k]))
}
attr(vec,"rank") <- rank ## actual rank
vec ## vec%*%t(vec) is the pseudoinverse
}
summary.gam <- function (object, dispersion = NULL, freq = FALSE,alpha=0, ...)
summary.gam <- function (object, dispersion = NULL, freq = FALSE, ...)
# summary method for gam object - provides approximate p values for terms + other diagnostics
# Improved by Henric Nilsson
{ pinv<-function(V,M,rank.tol=1e-6)
......@@ -2627,7 +2635,8 @@ summary.gam <- function (object, dispersion = NULL, freq = FALSE,alpha=0, ...)
df <- edf1 <- edf <- s.pv <- chi.sq <- array(0, m)
if (m>0) # form test statistics for each smooth
{ if (!freq) {
if (nrow(object$model)>3000) { ## subsample to get X for p-values calc.
sub.samp <- max(1000,2*length(object$coefficients))
if (nrow(object$model)>sub.samp) { ## subsample to get X for p-values calc.
seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed
if (inherits(seed,"try-error")) {
runif(1)
......@@ -2636,14 +2645,15 @@ summary.gam <- function (object, dispersion = NULL, freq = FALSE,alpha=0, ...)
kind <- RNGkind(NULL)
RNGkind("default","default")
set.seed(11) ## ensure repeatability
ind <- sample(1:nrow(object$model),3000,replace=FALSE) ## sample these rows from X
ind <- sample(1:nrow(object$model),sub.samp,replace=FALSE) ## sample these rows from X
X <- predict(object,object$model[ind,],type="lpmatrix")
RNGkind(kind[1],kind[2])
assign(".Random.seed",seed,envir=.GlobalEnv) ## RNG behaves as if it had not been used
} else { ## don't need to subsample
X <- model.matrix(object)
X <- X[!is.na(rowSums(X)),] ## exclude NA's (possible under na.exclude)
}
X <- X[!is.na(rowSums(X)),] ## exclude NA's (possible under na.exclude)
##if (alpha>0) X <- diag(ncol(X))
## get corrected edf
## edf1 <- 2*object$edf - rowSums(object$Ve*(t(X)%*%X))/object$sig2
}
......@@ -2666,7 +2676,7 @@ summary.gam <- function (object, dispersion = NULL, freq = FALSE,alpha=0, ...)
df[i] <- min(ncol(Xt),edf1[i])
D <- pinvXVX(Xt,V,df[i])
df[i] <- df[i]+alpha*sum(object$smooth[[i]]$sp<0) ## i.e. alpha * (number free sp's)
df[i] <- attr(D,"rank") ## df[i] ##+alpha*sum(object$smooth[[i]]$sp<0) ## i.e. alpha * (number free sp's)
chi.sq[i] <- sum((t(D)%*%ft)^2)
}
......@@ -2736,7 +2746,7 @@ print.summary.gam <- function(x, digits = max(3, getOption("digits") - 3),
}
anova.gam <- function (object, ..., dispersion = NULL, test = NULL, alpha=0, freq=FALSE)
anova.gam <- function (object, ..., dispersion = NULL, test = NULL, freq=FALSE)
# improved by Henric Nilsson
{ # adapted from anova.glm: R stats package
dotargs <- list(...)
......@@ -2755,7 +2765,7 @@ anova.gam <- function (object, ..., dispersion = NULL, test = NULL, alpha=0, fre
test = test))
if (!is.null(test)) warning("test argument ignored")
if (!inherits(object,"gam")) stop("anova.gam called with non gam object")
sg <- summary(object, dispersion = dispersion, freq = freq,alpha = alpha)
sg <- summary(object, dispersion = dispersion, freq = freq)
class(sg) <- "anova.gam"
sg
}
......
......@@ -499,7 +499,7 @@ polys.plot <- function(pc,z=NULL,scheme="heat",lab="",...) {
(zlim[2]-zlim[1])*(pc[[i]][,2]-ylim[1])/(ylim[2]-ylim[1])
ylim <- zlim
plot(0,0,ylim=ylim,xlim=xlim,type="n",xaxt="n",bty="n",xlab="",ylab=lab)
plot(0,0,ylim=ylim,xlim=xlim,type="n",xaxt="n",bty="n",xlab="",ylab=lab,...)
for (i in 1:length(pc)) {
coli <- round((z[i] - zlim[1])/(zlim[2]-zlim[1])*100)
poly2(pc[[i]],col=scheme[coli])
......@@ -542,7 +542,7 @@ plot.mrf.smooth <- function(x,P=NULL,data=NULL,label="",se1.mult=1,se2.mult=2,
main=label))
} else { ## do plot
if (scheme==0) scheme <- "heat" else scheme <- "grey"
polys.plot(x$xt$polys,P$fit,scheme=scheme,lab=P$main)
polys.plot(x$xt$polys,P$fit,scheme=scheme,lab=P$main,...)
}
} ## end plot.mrf.smooth
......
......@@ -2812,7 +2812,8 @@ smoothCon <- function(object,data,knots,absorb.cons=FALSE,scale.penalty=TRUE,n=n
if (!is.null(offs)) attr(sml[[1]]$X,"offset") <- offs*as.numeric(by)
}
sml[[1]]$label <- paste(sm$label,":",object$by,sep="")
if (object$by == "NA") sml[[1]]$label <- sm$label else
sml[[1]]$label <- paste(sm$label,":",object$by,sep="")
## test for cases where no centring constraint on the smooth is needed.
if (!alwaysCon) {
......
** denotes quite substantial/important changes
*** denotes really big changes
1.7-11
* smoothCon bug fix to avoid NA labels for matrix arguments when
no by variable provided.
* modification to p-value computation in summary.gam: `alpha' argument
removed (was set to zero anyway); computation now deals with possibility
of rank deficiency computing psuedo-inverse of cov matrix for statistic.
Previously p-value computation could fail for random effect smooths with
large datasets, when a random effect has many levels. Also for large data
sets test statistic is now based on randomly sampling max(1000,np*2) model
matrix rows, where np is number of model coefficients (random number
generator state unchanged by this), previous sample size was 3000.
* plot.mrf.smooth modified to allow passing '...' argument.
* 'negbin' modified to avoid spurious warnings on initialization call.
1.7-10
* fix stupid bug in 1.7-9 that lost term labels in plot.gam.
......
......@@ -13,7 +13,7 @@ suggests that best p-value performance results from using ML estimated smoothing
}
\usage{
\method{anova}{gam}(object, ..., dispersion = NULL, test = NULL,
alpha = 0, freq = FALSE)
freq = FALSE)
\method{print}{anova.gam}(x, digits = max(3, getOption("digits") - 3),...)
}
%- maybe also `usage' for other objects documented here.
......@@ -23,8 +23,6 @@ suggests that best p-value performance results from using ML estimated smoothing
\item{dispersion}{ a value for the dispersion parameter: not normally used.}
\item{test}{what sort of test to perform for a multi-model call. One of
\code{"Chisq"}, \code{"F"} or \code{"Cp"}. }
\item{alpha}{adjustment to degrees of freedom per estimated smoothing parameter for a
term when called with a single model object. See \code{\link{summary.gam}} for details.}
\item{freq}{whether to use frequentist or Bayesian approximations for single smooth term
p-values. See \code{\link{summary.gam}} for details.}
\item{digits}{number of digits to use when printing output.}
......@@ -35,7 +33,8 @@ is assessed using Wald tests: see \code{\link{summary.gam}} for details of the
actual computations.
In the latter case \code{print.anova.gam} is used as the
printing method. Note that the p-values for smooth terms are approximate only:
simulation evidence suggests that they work best with REML or ML smoothness selection.
simulation evidence suggests that they work best with ML smoothness selection
(REML coming a close second).
}
......
......@@ -7,7 +7,7 @@
summaries from it. (See \code{\link{sink}} to divert output to a file.)
}
\usage{
\method{summary}{gam}(object, dispersion=NULL, freq=FALSE, alpha=0, ...)
\method{summary}{gam}(object, dispersion=NULL, freq=FALSE, ...)
\method{print}{summary.gam}(x,digits = max(3, getOption("digits") - 3),
signif.stars = getOption("show.signif.stars"),...)
......@@ -25,9 +25,6 @@ summaries from it. (See \code{\link{sink}} to divert output to a file.)
covariance matrix of the parameter estimators. If this is set to TRUE then
the frequentist covariance matrix of the parameters is used instead. See details. }
\item{alpha}{ adjustment to reference distribution per estimated smoothing parameter.
}
\item{digits}{controls number of digits printed in output.}
\item{signif.stars}{Should significance stars be printed alongside output.}
......@@ -71,8 +68,8 @@ covariate values and let \eqn{{\bf V}_f}{V_f} denote the corresponding Bayesian
EDF for the term. The statistic used is then
\deqn{T = {\bf f}^T {\bf V}_f^{r-}{\bf f}}{T = f'V*_f f}
(this can be calculated efficiently without forming the pseudoinverse explicitly). \eqn{T}{T} is compared to a
chi-squared distribution with degrees of freedom given by the EDF for the term + \code{alpha} * (number of
estimated smoothing parameters), or \eqn{T}{T} is used as a component in an F ratio statistic if the
chi-squared distribution with degrees of freedom given by the EDF for the term,
or \eqn{T}{T} is used as a component in an F ratio statistic if the
scale parameter has been estimated. The non-integer rank truncated inverse is constructed to give an
approximation varying smoothly between the bounding integer rank approximations, while yielding test statistics with the
correct mean and variance.
......@@ -84,7 +81,7 @@ for \eqn{\bf f}{f} implied by the truncation used in the test statistic.
Note that the p-values distributional approximations start to break down below one effective degree of freedom, and
p-values are not reported below 0.5 degrees of freedom.
In simualtions the p-values have best behaviour under ML smoothness selection, with REML coming second.
In simulations the p-values have best behaviour under ML smoothness selection, with REML coming second.
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment