Commit 9f161b8d authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.3-6

parent 1c4b223e
Package: mgcv
Version: 1.3-4
Version: 1.3-6
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: GAMs with GCV smoothness estimation and GAMMs by REML/PQL
......@@ -12,4 +12,4 @@ Imports: graphics, stats
Suggests: nlme (>= 3.1-52), MASS (>= 7.2-2)
LazyLoad: yes
License: GPL version 2 or later
Packaged: Tue Jul 12 15:24:47 2005; simon
Packaged: Tue Aug 16 13:43:11 2005; simon
......@@ -52,6 +52,8 @@ new.name Obtain a name for a new variable that is not
already in use
notExp Functions for better-than-log positive
parameterization
notExp2 Alternative to log parameterization for
variance components
null.space.dimension The basis of the space of un-penalized
functions for a t.p.r.s.
pcls Penalized Constrained Least Squares Fitting
......
useDynLib(mgcv)
export(anova.gam,coef.pdIdnot,coef.pdTens,corMatrix.pdIdnot,Dim.pdIdnot,
export(
anova.gam,coef.pdIdnot,coef.pdTens,
corMatrix.pdIdnot,Dim.pdIdnot,
exclude.too.far,extract.lme.cov, extract.lme.cov2,
formXtViX, full.score, formula.gam,fixDependence,fix.family.link,
fix.family.var, gam, gam2derivative, gam2objective,
......@@ -8,10 +10,12 @@ export(anova.gam,coef.pdIdnot,coef.pdTens,corMatrix.pdIdnot,Dim.pdIdnot,
gam.fit, gam.outer, gamm.setup , influence.gam, interpret.gam,
gam.setup,
gam.side,gam.method,
get.var,logDet.pdIdnot,initial.sp,logLik.gam,
get.var,
logDet.pdIdnot,
initial.sp,logLik.gam,
magic, magic.post.proc, mgcv, mgcv.control, mgcv.find.theta,
mgcv.get.scale, mono.con, mroot, new.name,
notExp,notLog,pcls,null.space.dimension,
notExp,notExp2,notLog,notLog2,pcls,null.space.dimension,
pdConstruct.pdIdnot,pdFactor.pdIdnot,pdMatrix.pdIdnot,pdIdnot,
pdConstruct.pdTens,pdFactor.pdTens,pdMatrix.pdTens,pdTens,
place.knots, plot.gam, print.anova.gam,
......@@ -31,7 +35,9 @@ export(anova.gam,coef.pdIdnot,coef.pdTens,corMatrix.pdIdnot,Dim.pdIdnot,
smooth.construct.tensor.smooth.spec,
smooth.construct.tp.smooth.spec,
smooth.construct.ts.smooth.spec,
summary.gam,solve.pdIdnot,summary.pdIdnot,summary.pdTens,te,
summary.gam,
solve.pdIdnot,
summary.pdIdnot,summary.pdTens,te,
tensor.prod.model.matrix,tensor.prod.penalties,
uniquecombs, vcov.gam, vis.gam)
......
......@@ -29,7 +29,29 @@ notLog <- function(x)
f
}
## notLog/notExp replacements.
## around 27/7/05 nlme was modified to use a new optimizer, which fails with
## indefinite Hessians. This is a problem if smoothing parameters are zero
## or infinite. The following attempts to make the notLog parameterization
## non-monotonic, to artificially reduce the likelihood at very large and very
## small parameter values.
## note gamm, pdTens, pdIdnot, notExp and notExp2 .Rd files all modified by
## this change.
notExp2 <- function (x,d=.Options$mgcv.vc.logrange,b=1/d)
## to avoid needing to modify solve.pdIdnot, this transformation must
## maintain the property that 1/notExp2(x) = notExp2(-x)
{ f <- exp(d*sin(x*b))
}
notLog2 <- function(x,d=.Options$mgcv.vc.logrange,b=1/d)
{ x <- log(x)/d
x <- pmin(1,x)
x <- pmax(-1,x)
asin(x)/b
}
#### pdMat class definitions, to enable tensor product smooths to be employed with gamm()
......@@ -63,9 +85,6 @@ pdConstruct.pdTens <-
{
val <- NextMethod()
if (length(val) == 0) { # uninitiliazed object
# if ((ncol <- length(Names(val))) > 0) {
# attr(val, "ncol") <- ncol
# }
class(val) <- c("pdTens","pdMat")
return(val)
}
......@@ -83,9 +102,8 @@ pdConstruct.pdTens <-
## the following seems to be only way to tell which.
if (summary(mod2)$r.sq>summary(mod1)$r.sq) mod1<-mod2
value <- coef(mod1)
# if (sum(value<=0)) warning("sp hits lower limit")
value[value <=0] <- .Machine$double.eps * mean(as.numeric(lapply(S,function(x) max(abs(x)))))
value <- notLog(value)
value <- notLog2(value)
attributes(value) <- attributes(val)[names(attributes(val)) != "dim"]
class(value) <- c("pdTens", "pdMat")
return(value)
......@@ -108,12 +126,9 @@ pdFactor.pdTens <- function(object)
{ sp <- as.vector(object)
m <- length(sp)
S <- attr(formula(object),"S")
value <- S[[1]]*notExp(sp[1])
if (m>1) for (i in 2:m) value <- value + notExp(sp[i])*S[[i]]
value <- S[[1]]*notExp2(sp[1])
if (m>1) for (i in 2:m) value <- value + notExp2(sp[i])*S[[i]]
if (sum(is.na(value))>0) warning("NA's in pdTens factor")
#### EXPERIMENTAL
# value<-solve(value,tol=0)
###
value <- (value+t(value))/2
c(t(mroot(value,rank=nrow(value))))
}
......@@ -132,16 +147,13 @@ pdMatrix.pdTens <-
sp <- as.vector(object)
m <- length(sp)
S <- attr(formula(object),"S")
value <- S[[1]]*notExp(sp[1])
if (m>1) for (i in 2:m) value <- value + notExp(sp[i])*S[[i]]
#### EXPERIMENTAL
# value<-solve(value,tol=0)
###
value <- S[[1]]*notExp2(sp[1])
if (m>1) for (i in 2:m) value <- value + notExp2(sp[i])*S[[i]]
value <- (value + t(value))/2 # ensure symmetry
if (sum(is.na(value))>0) warning("NA's in pdTens matrix")
if (factor) {
value <- t(mroot(value,rank=nrow(value)))
# attr(value, "logDet") <- sum(log(diag(value)))
}
dimnames(value) <- attr(object, "Dimnames")
value
......@@ -154,7 +166,7 @@ coef.pdTens <-
{
if (unconstrained) NextMethod()
else {
val <- notExp(as.vector(object))
val <- notExp2(as.vector(object))
names(val) <- paste("sp.",1:length(val), sep ="")
val
}
......@@ -163,10 +175,6 @@ coef.pdTens <-
summary.pdTens <-
function(object, structName = "Tensor product smooth term", ...)
{
# summary.pdMat(object, structName, noCorrelation = TRUE)
## ... summary.pdMat is not exported in the nlme NAMESPACE file, so....
NextMethod(object, structName, noCorrelation=TRUE)
}
......@@ -175,7 +183,7 @@ summary.pdTens <-
### pdIdnot: multiple of the identity matrix - the parameter is
### the notLog of the multiple. This is directly modified form
### the notLog2 of the multiple. This is directly modified form
### Pinheiro and Bates pdIdent class.
####* Constructor
......@@ -183,7 +191,7 @@ summary.pdTens <-
pdIdnot <-
## Constructor for the pdIdnot class
function(value = numeric(0), form = NULL, nam = NULL, data = sys.frame(sys.parent()))
{
{ #cat(" pdIdnot ")
object <- numeric(0)
class(object) <- c("pdIdnot", "pdMat")
pdConstruct(object, value, form, nam, data)
......@@ -201,7 +209,8 @@ corMatrix.pdIdnot <-
stop(paste("Cannot extract the matrix with uninitialized dimensions"))
}
val <- diag(Ncol)
attr(val, "stdDev") <- rep(notExp(as.vector(object)), Ncol)
## REMOVE sqrt() to revert ...
attr(val, "stdDev") <- rep(sqrt(notExp2(as.vector(object))), Ncol)
if (length(nm <- Names(object)) == 0) {
nm <- paste("V", 1:len, sep = "")
dimnames(val) <- list(nm, nm)
......@@ -213,7 +222,7 @@ corMatrix.pdIdnot <-
pdConstruct.pdIdnot <-
function(object, value = numeric(0), form = formula(object),
nam = Names(object), data = sys.frame(sys.parent()), ...)
{
{ #cat(" pdConstruct.pdIdnot ")
val <- NextMethod()
if (length(val) == 0) { # uninitialized object
if ((ncol <- length(Names(val))) > 0) {
......@@ -222,7 +231,8 @@ pdConstruct.pdIdnot <-
return(val)
}
if (is.matrix(val)) {
value <- notLog(sqrt(mean(diag(crossprod(val)))))
# value <- notLog2(sqrt(mean(diag(crossprod(val)))))
value <- notLog2(mean(diag(crossprod(val)))) ## REPLACE by above to revert
attributes(value) <- attributes(val)[names(attributes(val)) != "dim"]
attr(value, "ncol") <- dim(val)[2]
class(value) <- c("pdIdnot", "pdMat")
......@@ -243,13 +253,15 @@ pdConstruct.pdIdnot <-
pdFactor.pdIdnot <-
function(object)
{
notExp(as.vector(object)) * diag(attr(object, "ncol"))
{ ## UNCOMMENT first line, comment 2nd to revert
# notExp2(as.vector(object)) * diag(attr(object, "ncol"))
#cat(" pdFactor.pdIdnot ")
sqrt(notExp2(as.vector(object))) * diag(attr(object, "ncol"))
}
pdMatrix.pdIdnot <-
function(object, factor = FALSE)
{
{ #cat(" pdMatrix.pdIdnot ")
if (!isInitialized(object)) {
stop("Cannot extract the matrix from an uninitialized pdMat object")
}
......@@ -257,14 +269,16 @@ pdMatrix.pdIdnot <-
stop(paste("Cannot extract the matrix with uninitialized dimensions"))
}
value <- diag(Ncol)
## REPLACE by #1,#2,#3 to revert
if (factor) {
value <- notExp(as.vector(object)) * value
attr(value, "logDet") <- Ncol * log(notExp(as.vector(object)))
#1 value <- notExp2(as.vector(object)) * value
#2 attr(value, "logDet") <- Ncol * log(notExp2(as.vector(object)))
value <- sqrt(notExp2(as.vector(object))) * value
attr(value, "logDet") <- Ncol * log(notExp2(as.vector(object)))/2
} else {
value <- notExp(as.vector(object))^2 * value
#3 value <- notExp2(as.vector(object))^2 * value
value <- notExp2(as.vector(object)) * value
}
dimnames(value) <- attr(object, "Dimnames")
value
......@@ -274,9 +288,9 @@ pdMatrix.pdIdnot <-
coef.pdIdnot <-
function(object, unconstrained = TRUE, ...)
{
{ #cat(" coef.pdIdnot ")
if (unconstrained) NextMethod()
else structure(notExp(as.vector(object)),
else structure(notExp2(as.vector(object)),
names = c(paste("sd(", deparse(formula(object)[[2]],backtick=TRUE),")",sep = "")))
}
......@@ -292,8 +306,9 @@ Dim.pdIdnot <-
logDet.pdIdnot <-
function(object, ...)
{
attr(object, "ncol") * log(notExp(as.vector(object)))
{ ## REMOVE /2 to revert ....
attr(object, "ncol") * log(notExp2(as.vector(object)))/2
}
solve.pdIdnot <-
......@@ -308,7 +323,7 @@ solve.pdIdnot <-
summary.pdIdnot <-
function(object, structName = "Multiple of an Identity", ...)
{
{ #cat(" summary.pdIdnot ")
# summary.pdMat(object, structName, noCorrelation = TRUE)
## ... summary.pdMat is not exported in the nlme NAMESPACE file, so....
......@@ -850,12 +865,13 @@ new.name <- function(proposed,old.names)
gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=list(),weights=NULL,
subset=NULL,na.action,knots=NULL,control=lmeControl(niterEM=3),niterPQL=20,verbosePQL=TRUE,...)
subset=NULL,na.action,knots=NULL,control=lmeControl(niterEM=0),niterPQL=20,verbosePQL=TRUE,...)
## NOTE: niterEM modified after changed notLog parameterization - old version
## needed niterEM=3. 10/8/05.
# Routine to fit a GAMM to some data. Fixed and smooth terms are defined in the formula, but the wiggly
# parts of the smooth terms are treated as random effects. The onesided formula random defines additional
# random terms. correlation describes the correlation structure. This routine is basically an interface
# between the bases constructors provided in mgcv and the glmmPQL routine used to estimate the model.
# NOTE: need to fill out the gam object properly
{ if (!require(nlme)) stop("gamm() requires package nlme to be installed")
......@@ -967,6 +983,7 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
##ret$lme<-lme(fixed.formula,random=rand,data=mf,correlation=correlation,control=control)
} else
{ ## Again, construction is a work around for nlme 3-1.52
if (verbosePQL) cat("\n Maximum number of PQL iterations: ",niterPQL,"\n")
eval(parse(text=paste("ret$lme<-glmmPQL(",deparse(fixed.formula),
",random=rand,data=strip.offset(mf),family=family,correlation=correlation,control=control,",
"weights=weights,niter=niterPQL,verbose=verbosePQL)",sep="")))
......@@ -1015,8 +1032,8 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
if (G$m>0) for (i in 1:G$m) # var.param in reverse term order, but forward order within terms!!
{ n.sp <- length(object$smooth[[i]]$S) # number of s.p.s for this term
if (inherits(object$smooth[[i]],"tensor.smooth"))#&&n.sp>1)
object$sp[k:(k+n.sp-1)] <- notExp(var.param[(n.v-n.sp+1):n.v])
else object$sp[k:(k+n.sp-1)] <- 1/notExp(var.param[(n.v-n.sp+1):n.v])^2
object$sp[k:(k+n.sp-1)] <- notExp2(var.param[(n.v-n.sp+1):n.v])
else object$sp[k:(k+n.sp-1)] <- 1/notExp2(var.param[(n.v-n.sp+1):n.v]) ## ^2 <- reinstate to revert
k <- k + n.sp
n.v <- n.v - n.sp
}
......
This diff is collapsed.
1.3-6
* pdIdnot class changed so that parameters are variances not standard
deviations - this makes for greater consistency with pdTens class, and
means that limits on notLog2 parameterization should mean the same thing
for both classes.
* niterEM set to 0 in lme calls. This is because EM steps in lme are not
set up to deal properly with user defined pdMat classes.
1.3-5
* Improvements to anova and summary functions by Henric Nilsson
incorporated. Functions are now closer to glm equivalents, and
printing is more informative. See ?anova.gam and ?summary.gam.
* nlme 3.1-62 changed the optimizer underlying lme, so that indefintie
likelihoods cause problems. See ?logExp2 for the workaround.
- niterEM now reset to 25, since parameterization prevents parameters
wandering to +/- infinity (this is important as starting values for
Newton steps are now more critical, since reparameterization
introduces new local minima).
* smoothCon modified to rescale penalty coefficient matrices to have
similar `size' to X'X for each term. This is to try and ensure that
gamm is reasonably scale invariant in its behaviour, given the
logExp2 re-parameterization.
* magic dropped dimensions of an array inapproporiately - fixed.
* gam now checks that model does not have more coefficients than data.
1.3-4
* inst/CITATION file added. Some .Rd fixes
30/6/2005 1.3-3
* te() smooths were not always estimated correctly by gamm(): invariance
......
......@@ -12,7 +12,7 @@ are usually approximate, unless the models are un-penalized.
}
\usage{
anova.gam(object, ..., dispersion = NULL, test = NULL)
print.anova.gam(x,...)
print.anova.gam(x, digits = max(3, getOption("digits") - 3),...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
......@@ -21,6 +21,7 @@ print.anova.gam(x,...)
\item{dispersion}{ a value for the dispersion parameter: not normally used.}
\item{test}{what sort of test to perform for a multi-model call. One of
\code{"Chisq"}, \code{"F"} or \code{"Cp"}. }
\item{digits}{number of digits to use when printing output.}
}
\details{ If more than one fitted model is provided than \code{anova.glm} is
used. If only one model is provided then the significance of each model term
......@@ -52,7 +53,8 @@ which is in fact an object returned from \code{\link{summary.gam}}.
}
\author{ Simon N. Wood \email{simon.wood@r-project.org}}
\author{ Simon N. Wood \email{simon.wood@r-project.org} with substantial
improvements by Henric Nilsson.}
\section{WARNING }{ P-values may be under-estimates, as a result of ignoring
smoothing parameter uncertainty.
......
......@@ -29,7 +29,7 @@ To use this function effectively it helps to be quite familiar with the use of
\usage{
gamm(formula,random=NULL,correlation=NULL,family=gaussian(),
data=list(),weights=NULL,subset=NULL,na.action,knots=NULL,
control=lmeControl(niterEM=3),niterPQL=20,verbosePQL=TRUE,...)
control=lmeControl(niterEM=0),niterPQL=20,verbosePQL=TRUE,...)
}
\arguments{
......@@ -93,8 +93,10 @@ numbers of knots, unless they share a covariate.
}
\item{control}{A list of fit control parameters for \code{\link[nlme]{lme}} returned by
\code{\link[nlme]{lmeControl}}. Note the default setting for the number of EM iterations
used by \code{lme}: high settings tend to lead to numerical problems because variance components
for smooth terms can legitimately be non-finite.}
used by \code{lme}: smooths are set up using custom \code{pdMat} classes,
which are currently not supported by the EM iteration code. Only increase this
number if you want to perturb the starting values used in model fitting
(usually to worse values!).}
\item{niterPQL}{Maximum number of PQL iterations (if any).}
......@@ -103,6 +105,7 @@ for smooth terms can legitimately be non-finite.}
\item{...}{further arguments for passing on e.g. to \code{lme}}
}
%- maybe also `usage' for other objects documented here.
\details{ The Bayesian model of spline smoothing introduced by Wahba (1983) and Silverman (1985) opens
up the possibility of estimating the degree of smoothness of terms in a generalized additive model
as variances of the wiggly components of the smooth terms treated as random effects. Several authors
......@@ -118,7 +121,16 @@ Some brief details of how GAMs are represented as mixed models and estimated usi
The bases used to represent smooth terms are the same as those used in \code{\link{gam}}.
}
In the event of \code{lme} convergence failures, consider
modifying \code{option(mgcv.vc.logrange)}: reducing it helps to remove
indefiniteness in the likelihood, if that is the problem, but too large a
reduction can force over or undersmoothing. See \code{\link{notExp2}} for more
information on this option. Failing that, you can try increasing the
\code{niterEM} option in \code{control}: this will perturb the starting values
used in fitting, but usually to values with lower likelihood! Note that this
version of \code{gamm} works best with R 2.2.0 or above and \code{nlme}, 3.1-62 and above,
since these use an improved optimizer.
}
\value{ Returns a list with two items:
......@@ -155,7 +167,7 @@ Wahba, G. (1983) Bayesian confidence intervals for the cross validated smoothing
JRSSB 45:133-150
Wood, S.N. (2004a) Stable and efficient multiple smoothing parameter estimation for
generalized additive models. Journal of the American Statistical Association. 99:637-686
generalized additive models. Journal of the American Statistical Association. 99:673-686
Wood, S.N. (2003) Thin plate regression splines. J.R.Statist.Soc.B 65(1):95-114
......@@ -173,6 +185,10 @@ Wang, Y. (1998) Mixed effects smoothing spline analysis of variance. J.R. Statis
\section{WARNINGS }{
This version of \code{gamm} works best with \code{nlme} 3.1-62 and above and R
2.2.0 and above. This is because it is designed to work with the optimizers
used from these versions onwards, rather than the earlier default optimizers.
The routine will be very slow and memory intensive if correlation structures
are used for the very large groups of data. e.g. attempting to run the
spatial example in the examples section with many 1000's of data is definitely not
......@@ -185,8 +201,8 @@ smoothing parameter, or a random effect specified in argument \code{random}.
Models like \code{s(z)+s(x)+s(x,z)} are not currently supported.
\code{gamm} is not as numerically stable as \code{gam}: an \code{lme} call will occasionally fail. Experimenting with
\code{niterEM} in the \code{control} argument can sometimes help.
\code{gamm} is not as numerically stable as \code{gam}: an \code{lme} call
will occasionally fail. See details section for suggestions.
\code{gamm} is usually much slower than \code{gam}, and on some platforms you may need to
increase the memory available to R in order to use it with large data sets
......@@ -199,6 +215,10 @@ Note that the \code{gam} object part of the returned object is not complete in
the sense of having all the elements defined in \code{\link{gamObject}} and
does not inherit from \code{glm}: hence e.g. multi-model \code{anova} calls will not work.
The parameterization used for the smoothing parameters in \code{gamm}, bounds
them above and below by an effective infinity and effective zero. See
\code{\link{notExp2}} for details of how to change this.
}
\seealso{\code{\link{te}}, \code{\link{s}}, \code{\link{predict.gam}},
......
......@@ -17,8 +17,10 @@ looks like.
\code{notLog} is the inverse function of \code{notExp}.
The major use of these functions is to provide more robust \code{pdMat} classes for \code{lme} for use by
\code{\link{gamm}}.
The major use of these functions was originally to provide more robust
\code{pdMat} classes for \code{lme} for use by \code{\link{gamm}}. Currently
the \code{\link{notExp2}} and \code{\link{notLog2}} functions are used in
their place, as a result of changes to the nlme optimization routines.
}
\usage{
......
\name{notExp2}
\alias{notExp2}
\alias{notLog2}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{ Alternative to log parameterization for variance components}
\description{ \code{notLog2} and \code{notExp2} are alternatives to \code{log}
and \code{exp} or \code{\link{notLog}} and \code{\link{notExp}} for
re-parameterization of variance parameters. They are used by the
\code{\link{pdTens}} and \code{\link{pdIdnot}} classes which in turn implement
smooths for \code{\link{gamm}}.
The functions are typically used to ensure that smoothing parameters are
positive, but the \code{notExp2} is not monotonic: rather it cycles between
`effective zero' and `effective infinity' as its argument changes. The
\code{notLog2} is the inverse function of the \code{notExp2} only over an
interval centered on zero.
Parameterizations using these functions ensure that estimated smoothing
parameters remain positive, but also help to ensure that the likelihood is
never indefinite: once a working parameter pushes a smoothing parameter below
`effetive zero' or above `effective infinity' the cyclic nature of the
\code{notExp2} causes the likelihood to decrease, where otherwise it might
simply have flattened.
This parameterization is really just a numerical trick, in order to get
\code{lme} to fit \code{gamm} models, without failing due to indefiniteness.
Note in particular that asymptoticresults on the likelihood/REML criterion are
not invalidated by the trick,
unless parameter estimates end up close to the effective zero or effective
infinity: but if this is the case then the asymptotics would also have been invalid
for a conventional monotonic parameterization.
This reparameterization was made necessary by some modifications to the
underlying optimization method in \code{lme} introduced in nlme 3.1-62. It is
possible that future releases will return to the \code{\link{notExp}} parameterization.
Note that you can reset `effective zero' and `effective infinity': see below.
}
\usage{
notExp2(x,d=.Options$mgcv.vc.logrange,b=1/d)
notLog2(x,d=.Options$mgcv.vc.logrange,b=1/d)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{x}{Argument array of real numbers (\code{notExp}) or positive real numbers (\code{notLog}).}
\item{d}{the range of \code{notExp2} runs from \code{exp(-d)} to
\code{exp(d)}. To change the range used by \code{gamm} reset
\code{mgcv.vc.logrange} using \code{\link{options}}.}
\item{b}{determines the period of the cycle of \code{notExp2}.}
}
\value{ An array of function values evaluated at the supplied argument values.}
\author{ Simon N. Wood \email{simon.wood@r-project.org}}
\references{
\url{http://www.stats.gla.ac.uk/~simon/}
}
\seealso{ \code{\link{pdTens}}, \code{\link{pdIdnot}}, \code{\link{gamm}}}
\examples{
## Illustrate the notExp2 function:
x <- seq(-50,50,length=1000)
op <- par(mfrow=c(2,2))
plot(x,notExp2(x),type="l")
lines(x,exp(x),col=2)
plot(x,log(notExp2(x)),type="l")
lines(x,log(exp(x)),col=2) # redundancy intended
x <- x/4
plot(x,notExp2(x),type="l")
lines(x,exp(x),col=2)
plot(x,log(notExp2(x)),type="l")
lines(x,log(exp(x)),col=2) # redundancy intended
par(op)
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..
......@@ -13,8 +13,10 @@
\title{Overflow proof pdMat class for multiples of the identity matrix}
\description{ This set of functions is a modification of the \code{pdMat} class \code{pdIdent}
from library \code{nlme}. The modification is to replace the log parameterization used in \code{pdMat}
with a \code{\link{notLog}} parameterization, since the latter is much less susceptible to overflow
and underflow of the parameters on the original scale. The functions are particularly useful for
with a \code{\link{notLog2}} parameterization, since the latter avoids
indefiniteness in the likelihood and associated convergence problems: the
parameters also relate to variances rather than standard deviations, for
consistency with the \code{\link{pdTens}} class. The functions are particularly useful for
working with Generalized Additive Mixed Models where variance parameters/smoothing parameters can
be very large or very small, so that overflow or underflow can be a problem.
......@@ -56,7 +58,7 @@ The \code{nlme} source code.
}
\seealso{ \code{\link{te}}, \code{\link{pdTens}}, \code{\link{notLog}}, \code{\link{gamm}}}
\seealso{ \code{\link{te}}, \code{\link{pdTens}}, \code{\link{notLog2}}, \code{\link{gamm}}}
\examples{
# see gamm
......
......@@ -14,7 +14,7 @@ parameters. In the mixed model formulation the penalty matrix is the inverse of
the random effects of a term, and the smoothing parameters (times a half) are variance parameters to be estimated.
It's not
possible to transform the problem to make the required random effects covariance matrix look like one of the standard
\code{pdMat} classes: hence the need for the \code{pdTens} class. A \code{\link{notLog}} parameterization ensures that
\code{pdMat} classes: hence the need for the \code{pdTens} class. A \code{\link{notLog2}} parameterization ensures that
the parameters are positive.
These functions (\code{pdTens}, \code{pdConstruct.pdTens},
......
......@@ -12,7 +12,7 @@ constraints to be absorbed into the parameterization of each term, if required.
}
\usage{
smoothCon(object,data,knots,absorb.cons=FALSE)
smoothCon(object,data,knots,absorb.cons=FALSE,scale.penalty=TRUE)
PredictMat(object,data)
}
%- maybe also `usage' for other objects documented here.
......@@ -24,6 +24,9 @@ evaluated.}
supplied for basis construction.}
\item{absorb.cons}{Set to \code{TRUE} in order to have identifiability
constraints absorbed into the basis.}
\item{scale.penalty}{should the penalty coefficient matrix be scaled to have
approximately the same `size' as the inner product of the terms model matrix
with itself? This can improve the performance of \code{\link{gamm}} fitting.}
}
\value{ From \code{smoothCon} a \code{smooth} object returned by the
......
......@@ -7,16 +7,28 @@
summaries from it.
}
\usage{
summary.gam(object,freq=TRUE,...)
print.summary.gam(x,...)
summary.gam(object, dispersion=NULL, freq=TRUE, ...)
print.summary.gam(x,digits = max(3, getOption("digits") - 3),
signif.stars = getOption("show.signif.stars"),...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{object}{ a fitted \code{gam} object as produced by \code{gam()}.}
\item{x}{a \code{summary.gam} object produced by \code{summary.gam()}.}
\item{dispersion}{A known dispersion parameter. \code{NULL} to use estimate or
default (e.g. 1 for Poisson).}
\item{freq}{By default p-values for individual terms are calculated using the frequentist estimated
covariance matrix of the parameter estimators. If this is set to FALSE then
the Bayesian covariance matrix of the parameters is used instead. See details. }
\item{digits}{controls number of digits printed in output.}
\item{signif.stars}{Should significance stars be printed alongside output.}
\item{...}{ other arguments.}
}
......@@ -40,11 +52,18 @@ interpret and often have much worse frequentist performance than the default p-v
}
\value{\code{summary.gam} produces a list of summary information for a fitted \code{gam} object.
\item{p.coeff}{is an array of estimates of the strictly parametric model coefficients.}
\item{p.t}{is an array of the \code{p.coeff}'s divided by their standard errors.}
\item{p.pv}{is an array of p-values for the null hypothesis that the corresponding parameter is zero.
Calculated with reference to the t distribution with the estimated residual degrees of freedom for the model fit.}
Calculated with reference to the t distribution with the estimated residual
degrees of freedom for the model fit if the dispersion parameter has been
estimated, and the standard normal if not.}
\item{m}{The number of smooth terms in the model.}
\item{chi.sq}{An array of test statistics for assessing the significance of
model smooth terms. If \eqn{ {\bf p}_i}{p_i}
is the parameter vector for the ith smooth term, and this term has estimated
......@@ -52,39 +71,68 @@ covariance matrix \eqn{ {\bf V}_i}{V_i} then the
statistic is \eqn{ {\bf p}_i^\prime {\bf V}_i^{k-} {\bf
p}_i}{p_i'V_i^{k-}p_i}, where \eqn{ {\bf V}^{k-}_i}{V_i^{k-}} is the rank k
pseudo-inverse of \eqn{ {\bf V_i}}{V_i}, and k is estimated rank of \eqn{ {\bf V_i}}{V_i}.}
\item{s.pv}{An array of approximate p-values for the null hypotheses that each
smooth term is zero. Be warned, these are only approximate. In the case in
which UBRE has been used, they are obtained by comparing the chi.sq statistic given above to the
smooth term is zero. Be warned, these are only approximate. In the case of
known dispersion parameter, they are obtained by comparing the chi.sq statistic given above to the
chi-squared distribution with k degrees of freedom, where k is the estimated
rank of \eqn{ {\bf V_i}}{V_i}. In the GCV case (in
which the scale parameter will have been estimated) the statistic is compared
rank of \eqn{ {\bf V_i}}{V_i}. If the dispersion parameter is unknown (in
which case it will have been estimated) the statistic is compared
to an F distribution with k upper d.f. and lower d.f. given by the residual degrees of freedom for the model .
Typically the p-values will be somewhat too low, because they are conditional
on the smoothing parameters, which are usually uncertain.
}
\item{se}{array of standard error estimates for all parameter estimates.}
\item{r.sq}{The adjusted r-squared for the model. Defined as the proportion of variance explained, where original variance and
residual variance are both estimated using unbiased estimators. This quantity can be negative if your model is worse than a one
parameter constant model, and can be higher for the smaller of two nested models! Note that proportion null deviance
explained is probably more appropriate for non-normal errors.}
\item{dev.expl}{The proportion of the null deviance explained by the model.}
\item{edf}{array of estimated degrees of freedom for the model terms.}
\item{residual.df}{estimated residual degrees of freedom.}
\item{n}{number of data.}
\item{gcv}{minimized GCV score for the model, if GCV used.}
\item{ubre}{minimized UBRE score for the model, if UBRE used.}
\item{scale}{estimated (or given) scale parameter.}
\item{family}{the family used.}
\item{formula}{the original GAM formula.}
\item{dispersion}{the scale parameter.}
\item{pTerms.df}{the degrees of freedom associated with each parameteric term
(excluding the constant).}
\item{pTerms.chi.sq}{a Wald statistic for testing the null hypothesis that the
each parametric term is zero.}
\item{pTerms.pv}{p-values associated with the tests that each term is
zero. For penalized fits these are approximate, being conditional on the smoothing
parameters. The reference distribution is an appropriate chi-squared when the
scale parameter is known, and is based on an F when it is not.}
\item{cov.unscaled}{The estimated covariance matrix of the parameters (or
estimators if \code{freq=TRUE}), divided
by scale parameter.}
\item{cov.scaled}{The estimated covariance matrix of the parameters
(estimators if \code{freq=TRUE}).}
\item{p.table}{significance table for parameters}
\item{s.table}{significance table for smooths}
\item