Commit 49710d4d authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.6-0

parent 043b3207
Package: mgcv
Version: 1.5-6
Version: 1.6-0
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: GAMs with GCV/AIC/REML smoothness estimation and GAMMs by PQL
......@@ -8,10 +8,10 @@ Description: Routines for GAMs and other generalized ridge regression
UBRE/AIC. Also GAMMs by REML or PQL. Includes a gam() function.
Priority: recommended
Depends: R (>= 2.3.0)
Imports: graphics, stats, nlme
Suggests: nlme (>= 3.1-64), splines
Imports: graphics, stats, nlme, Matrix
Suggests: nlme (>= 3.1-64), splines, Matrix
LazyLoad: yes
License: GPL (>= 2)
Packaged: 2009-09-11 09:31:19 UTC; simon
Packaged: 2009-11-18 15:17:40 UTC; simon
Repository: CRAN
Date/Publication: 2009-09-12 12:18:28
Date/Publication: 2009-11-18 17:00:51
useDynLib(mgcv, .registration = TRUE, .fixes = "C_")
export(anova.gam, cSplineDes,
export(anova.gam, bam, cSplineDes,
exclude.too.far,extract.lme.cov, extract.lme.cov2,
formXtViX, full.score, formula.gam,fixDependence,fix.family.link,
fix.family.var, fix.family.ls, gam, gam2derivative,
......@@ -10,7 +10,7 @@ export(anova.gam, cSplineDes,
interpret.gam,
gam.side,
get.var,ldTweedie,
initial.sp,logLik.gam,
initial.sp,logLik.gam,ls.size,
magic, magic.post.proc, mgcv, mgcv.control, model.matrix.gam,
mono.con, mroot, negbin, new.name,
notExp,notExp2,notLog,notLog2,pcls,null.space.dimension,
......@@ -25,7 +25,7 @@ export(anova.gam, cSplineDes,
Predict.matrix.tprs.smooth,
Predict.matrix.ts.smooth,
Predict.matrix.pspline.smooth,
residuals.gam, s,
residuals.gam,rig, s,
smoothCon,smooth.construct,smooth.construct2,
smooth.construct.cc.smooth.spec,
smooth.construct.cp.smooth.spec,
......@@ -49,6 +49,8 @@ importFrom(stats,.checkMFClasses,.getXlevels, as.formula, anova,anova.glmlist,co
predict,printCoefmat ,quantile,qqnorm, reformulate,residuals,rnorm,runif,termplot,terms,terms.formula, uniroot,
var,vcov)
importFrom(nlme,Dim,corMatrix,logDet,pdConstruct,pdFactor,pdMatrix)
importFrom(Matrix,t,mean,colMeans,colSums)
S3method(anova, gam)
S3method(influence, gam)
S3method(cooks.distance, gam)
......
This diff is collapsed.
This diff is collapsed.
......@@ -1032,7 +1032,7 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
object<-list(model=mf,formula=formula,smooth=G$smooth,nsdf=G$nsdf,family=family,
df.null=nrow(G$X),y=G$y,terms=gam.terms,pterms=pTerms,xlevels=G$xlevels,
contrasts=G$contrasts,assign=G$assign,na.action=attr(mf,"na.action"),
cmX=G$cmX,var.summary=G$var.summary)
cmX=G$cmX,var.summary=G$var.summary,scale.estimated=TRUE)
# Transform parameters back to the original space....
bf<-as.numeric(ret$lme$coefficients$fixed)
br<-as.numeric(unlist(ret$lme$coefficients$random))
......
This diff is collapsed.
......@@ -1256,10 +1256,12 @@ smoothCon <- function(object,data,knots,absorb.cons=FALSE,scale.penalty=TRUE,n=n
if (sum(sm$X==0)>.1*sum(sm$X!=0)) { ## treat term as sparse
if (sparse.cons==1) {
xsd <- apply(sm$X,2,FUN=sd)
if (sum(xsd==0)) ## are and columns constant?
if (sum(xsd==0)) ## are any columns constant?
sm$C <- ((1:length(xsd))[xsd==0])[1] ## index of coef to set to zero
else {
xz <- colSums(sm$X==0) ## find number of zeroes per column
## xz <- colSums(sm$X==0)
## find number of zeroes per column (without big memory footprint)...
xz <- apply(sm$X,2,FUN=function(x) {sum(x==0)})
sm$C <- ((1:length(xz))[xz==min(xz)])[1] ## index of coef to set to zero
}
} else if (sparse.cons==2) {
......
** denotes quite substantial/important changes
*** denotes really big changes
1.6-0
*** Routine `bam' added for fitting GAMs to very large datasets.
See ?bam
** p-value method tweaked again. Reference DoF for testing now
defaults to the alternative EDF estimate (based on 2F - FF where
F = (X'WX+S)^{-1}X'WX). `magic.post.proc' and `gam.fit3.post.proc'
changed to provide this. p-values still a bit too small, but only
slightly so, if `method="ML"' is the smoothness selector.
* bad bug in `get.null.coef' could cause fit failure as a result of
initial null coefs predicting infinite deviance.
* REML/ML convergence could be response scale sensitive, because of
innapropriate convergence testing scaling in newton and bfgs -
fixed.
* Slight fix to REML (not ML) score calculation in gam.fit3 -
Mp/2*log(2*pi*scale) was missing from REML score, where Mp is
total null space dimension for model.
* `summary.gam' bug fix: REML/ML models were always treated as if
scale parameter had been estimated. gamObject should now contain
`scale.estimated' indicating whether or not scale estimated
* some modifications to smoothCon and gam.setup to allow smooth
constructors to return Matrix style sparse model matrices and
penalty matrices.
* fixed misplaced bracket in print.mgcv.version, called on attachment.
* added utility function `ls.list' to give memory usage for elements
of a list.
* added function `rig' to generate inverse Gaussian random deviates.
1.5-6
* "ts" and "cs" modified so that zero eigen values of penalty
......
\name{bam}
\alias{bam}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Generalized additive models for very large datasets}
\description{ Fits a generalized additive model (GAM) to a very large
data set, the term `GAM' being taken to include any quadratically penalized GLM.
The degree of smoothness of model terms is estimated as part of
fitting. In use the function is much like \code{\link{gam}}, except that the numerical methods
are designed for datasets containing upwards of several tens of thousands of data. The advantage
of \code{bam} is much lower memory footprint than \code{\link{gam}}, but it can also be much faster,
for large datasets.
}
\usage{
bam(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,
na.action, offset=NULL,method="REML",control=gam.control(),
scale=0,gamma=1,knots=NULL,sp=NULL,min.sp=NULL,paraPen=NULL,
chunk.size=10000,...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{formula}{ A GAM formula (see \code{\link{formula.gam}} and also \code{\link{gam.models}}).
This is exactly like the formula for a GLM except that smooth terms, \code{s} and \code{te} can be added
to the right hand side to specify that the linear predictor depends on smooth functions of predictors
(or linear functionals of these).
}
\item{family}{
This is a family object specifying the distribution and link to use in
fitting etc. See \code{\link{glm}} and \code{\link{family}} for more
details. A negative binomial family is provided: see \code{\link{negbin}}, but
only the known theta case is supported by \code{bam}.
}
\item{data}{ A data frame or list containing the model response variable and
covariates required by the formula. By default the variables are taken
from \code{environment(formula)}: typically the environment from
which \code{gam} is called.}
\item{weights}{ prior weights on the data.}
\item{subset}{ an optional vector specifying a subset of observations to be
used in the fitting process.}
\item{na.action}{ a function which indicates what should happen when the data
contain `NA's. The default is set by the `na.action' setting
of `options', and is `na.fail' if that is unset. The
``factory-fresh'' default is `na.omit'.}
\item{offset}{Can be used to supply a model offset for use in fitting. Note
that this offset will always be completely ignored when predicting, unlike an offset
included in \code{formula}: this conforms to the behaviour of
\code{lm} and \code{glm}.}
\item{method}{The smoothing parameter estimation method. \code{"GCV.Cp"} to use GCV for unknown scale parameter and
Mallows' Cp/UBRE/AIC for known scale. \code{"GACV.Cp"} is equivalent, but using GACV in place of GCV. \code{"REML"}
for REML estimation, including of unknown scale, \code{"P-REML"} for REML estimation, but using a Pearson estimate
of the scale. \code{"ML"} and \code{"P-ML"} are similar, but using maximum likelihood in place of REML. }
\item{control}{A list of fit control parameters returned by
\code{\link{gam.control}}.}
\item{scale}{ If this is positive then it is taken as the known scale parameter. Negative signals that the
scale paraemter is unknown. 0 signals that the scale parameter is 1 for Poisson and binomial and unknown otherwise.
Note that (RE)ML methods can only work with scale parameter 1 for the Poisson and binomial cases.
}
\item{gamma}{It is sometimes useful to inflate the model degrees of
freedom in the GCV or UBRE/AIC score by a constant multiplier. This allows
such a multiplier to be supplied. }
\item{knots}{this is an optional list containing user specified knot values to be used for basis construction.
For most bases the user simply supplies the knots to be used, which must match up with the \code{k} value
supplied (note that the number of knots is not always just \code{k}).
See \code{\link{tprs}} for what happens in the \code{"tp"/"ts"} case.
Different terms can use different numbers of knots, unless they share a covariate.
}
\item{sp}{A vector of smoothing parameters can be provided here.
Smoothing parameters must be supplied in the order that the smooth terms appear in the model
formula. Negative elements indicate that the parameter should be estimated, and hence a mixture
of fixed and estimated parameters is possible. If smooths share smoothing parameters then \code{length(sp)}
must correspond to the number of underlying smoothing parameters.}
\item{min.sp}{Lower bounds can be supplied for the smoothing parameters. Note
that if this option is used then the smoothing parameters \code{full.sp}, in the
returned object, will need to be added to what is supplied here to get the
smoothing parameters actually multiplying the penalties. \code{length(min.sp)} should
always be the same as the total number of penalties (so it may be longer than \code{sp},
if smooths share smoothing parameters).}
\item{paraPen}{optional list specifying any penalties to be applied to parametric model terms.
\code{\link{gam.models}} explains more.}
\item{chunk.size}{The model matrix is created in chunks of this size, rather than ever being formed whole.}
\item{...}{further arguments for
passing on e.g. to \code{gam.fit} (such as \code{mustart}). }
}
\value{
An object of class \code{"gam"} as described in \code{\link{gamObject}}.
}
\details{ \code{bam} operates by first setting up the basis characteristics for the smooths, using a representative subsample
of the data. Then the model matrix is constructed in blocks using \code{\link{predict.gam}}. For each block the factor R,
from the QR decomposition of the whole model matrix is updated, along with Q'y. and the sum of squares of y. At the end of
block processing, fitting takes place, without the need to ever form the whole model matrix.
In the generalized case, the same trick is used with the weighted model matrix and weighted pseudodata, at each step of the PIRLS.
Smoothness selection is performed on the working model at each stage (performance oriented iteration), to maintain the
small memory footprint. This is easy to
justify in the case of GCV or Cp/UBRE/AIC based model selection, and also for REML/ML with known scale parameter. It is slightly
less well justified in the REML/ML unknown scale parameter case, but it seems unlikely that this case actually behaves any less well.
Note that POI is not as stable as the default nested iteration used with \code{\link{gam}}, but that for very large, information rich,
datasets, this is unlikely to matter much.
Note that it is possible to spend most of the computational time on basis evaluation, if an expensive basis is used. In practice
this means that the default \code{"tp"} basis should be avoided.
}
\references{
\url{http://www.maths.bath.ac.uk/~sw283/}
}
\author{ Simon N. Wood \email{simon.wood@r-project.org}
}
\section{WARNINGS }{
The routine will be slow if the default \code{"tp"} basis is used.
You must have more unique combinations of covariates than the model has total
parameters. (Total parameters is sum of basis dimensions plus sum of non-spline
terms less the number of spline terms).
This routine is less stable than `gam' for a the same dataset.
The negbin family is only supported for the *known theta* case.
}
\seealso{\code{\link{mgcv-package}}, \code{\link{gamObject}}, \code{\link{gam.models}}, \code{\link{smooth.terms}},
\code{\link{linear.functional.terms}}, \code{\link{s}},
\code{\link{te}} \code{\link{predict.gam}},
\code{\link{plot.gam}}, \code{\link{summary.gam}}, \code{\link{gam.side}},
\code{\link{gam.selection}},\code{\link{mgcv}}, \code{\link{gam.control}}
\code{\link{gam.check}}, \code{\link{linear.functional.terms}} \code{\link{negbin}}, \code{\link{magic}},\code{\link{vis.gam}}
}
\examples{
library(mgcv)
## following is not *very* large, for obvious reasons...
dat <- gamSim(1,n=15000,dist="normal",scale=20)
bs <- "ps";k <- 20
b<-bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat,method="REML")
summary(b)
plot(b,pages=1,rug=FALSE) ## plot smooths, but not rug
plot(b,pages=1,rug=FALSE,seWithMean=TRUE) ## `with intercept' CIs
ba<-bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat,method="GCV.Cp") ## use GCV
summary(ba)
## A Poisson example...
dat <- gamSim(1,n=15000,dist="poisson",scale=.1)
b1<-bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat,method="ML",family=poisson())
b1
}
\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..
\concept{Varying coefficient model}
\concept{Functional linear model}
\concept{Penalized GLM}
\concept{Generalized Additive Model}
\concept{Penalized regression}
\concept{Spline smoothing}
\concept{Penalized regression spline}
\concept{Generalized Cross Validation}
\concept{Smoothing parameter selection}
\concept{tensor product smoothing}
\concept{thin plate spline}
\concept{P-spline}
\concept{Generalized ridge regression}
......@@ -12,7 +12,7 @@ estimation of degree of penalization). Isotropic or scale invariant smooths of a
available as model terms, as are linear functionals of such smooths; confidence/credible intervals are readily
available for any quantity predicted using a fitted model; \code{gam} is extendable: users can add smooths.
Smooth terms are represented using penalized regression splines (or similar smoothers)
Smooth terms are represented using penalized regression splines (or similar smoothers)
with smoothing parameters selected by GCV/UBRE/AIC/REML or by regression splines with
fixed degrees of freedom (mixtures of the two are permitted). Multi-dimensional smooths are
available using penalized thin plate regression splines (isotropic) or tensor product splines
......@@ -33,6 +33,8 @@ terms can have more than one argument, implying an isotropic smooth and (b) \cod
provided as an effective means for modelling smooth interactions of any
number of variables via scale invariant tensor product smooths. If you want
a clone of what S-PLUS provides use \link[gam]{gam} from package \code{gam}.
For very large datasets see \code{\link{bam}}, for mixed GAM see \code{\link{gamm}}.
}
\usage{
......@@ -435,7 +437,8 @@ plot(b5,pages=1)
dat <- gamSim(1,n=400,dist="binary",scale=.33)
lr.fit <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=binomial,data=dat)
lr.fit <- gam(y~s(x0)+s(x1)+s(x2)+s(x3),family=binomial,
data=dat,method="REML")
## plot model components with truth overlaid in red
op <- par(mfrow=c(2,2))
fn <- c("f0","f1","f2","f3");xn <- c("x0","x1","x2","x3")
......@@ -447,10 +450,26 @@ for (k in 1:4) {
}
par(op)
anova(lr.fit)
lr.fit1 <- gam(y~s(x0)+s(x1)+s(x2),family=binomial,data=dat)
lr.fit2 <- gam(y~s(x1)+s(x2),family=binomial,data=dat)
lr.fit1 <- gam(y~s(x0)+s(x1)+s(x2),family=binomial,
data=dat,method="REML")
lr.fit2 <- gam(y~s(x1)+s(x2),family=binomial,
data=dat,method="REML")
AIC(lr.fit,lr.fit1,lr.fit2)
## A Gamma example, by modify `gamSim' output...
dat <- gamSim(1,n=400,dist="normal",scale=1)
dat$f <- dat$f/4 ## true linear predictor
Ey <- exp(dat$f);scale <- .5 ## mean and GLM scale parameter
## Note that `shape' and `scale' in `rgamma' are almost
## opposite terminology to that used with GLM/GAM...
dat$y <- rgamma(Ey*0,shape=1/scale,scale=Ey*scale)
bg <- gam(y~ s(x0)+ s(x1)+s(x2)+s(x3),family=Gamma(link=log),
data=dat,method="REML")
plot(bg,pages=1)
## For inverse Gaussian, see ?rig
## now a 2D smoothing example...
eg <- gamSim(2,n=500,scale=.1)
......
......@@ -26,7 +26,8 @@ gam.fit3(x, y, sp, Eb ,UrS=list(),
mustart = NULL, offset = rep(0, nobs), U1 = diag(ncol(x)),
Mp = -1, family = gaussian(), control = gam.control(),
intercept = TRUE,deriv=2,gamma=1,scale=1,
printWarn=TRUE,scoreType="REML",null.coef=rep(0,ncol(x)),...)
printWarn=TRUE,scoreType="REML",null.coef=rep(0,ncol(x)),
pearson.extra=0,dev.extra=0,n.true=-1,...)
}
%- maybe also `usage' for other objects documented here.
\arguments{
......@@ -80,6 +81,15 @@ fitted model, rather than an intermediate used in optimization.}
\item{null.coef}{coefficients for a model which gives some sort of upper bound on deviance.
This allows immediate divergence problems to be controlled.}
\item{pearson.extra}{Extra component to add to numerator of pearson statistic
in P-REML/P-ML smoothness selection criteria.}
\item{dev.extra}{Extra component to add to deviance for REML/ML type smoothness selection criteria.}
\item{n.true}{Number of data to assume in smoothness selection criteria. <=0 indicates that it should be the
number of rows of \code{X}.}
\item{...}{Other arguments: ignored.}
}
\details{ This routine is basically \code{\link{glm.fit}} with some
......
......@@ -207,9 +207,13 @@ log smoothing parameters that actually multiply the individual quadratic penalti
\code{sp} is a vector of (underlying) smoothing parameter values: positive values are taken as fixed, negative to signal that
the smoothing parameter should be estimated. Taken as all negative if not supplied.
An obvious application of \code{paraPen} is to incorporate random effects, and an example of this is provided below. In this, divide the scale parameter by
the estimated smoothing parameters to recover variance component estimates.
An obvious application of \code{paraPen} is to incorporate random effects, and an example of this is provided below. In this
case the supplied penalty matrices will be (generalized) inverse covariance matrices for the random effects --- i.e. precision
matrices. The final estimate of the covariance matrix corresponding to one of these penalties is given by the (generalized)
inverse of the penalty matrix multiplied by the estimated scale parameter and divided by the estimated
smoothing parameter for the penalty. For example, if you use an identity matrix to penalize some coefficients that are to be viewed as i.i.d.
Gaussian random effects, then their estimated variance will be the estimated scale parameter divided by the estimate of the
smoothing parameter, for this penalty. See the `rail' example below.
}
......@@ -281,8 +285,6 @@ sig.b <- sqrt(rm$sig2/rm$sp[1]);sig.b
## Simple comparison with lme, using Rail data.
## "ML" used as "REML" treatments of residual variance
## differ slightly...
require(nlme)
b0 <- lme(travel~1,data=Rail,~1|Rail,method="ML")
Z <- model.matrix(~Rail-1,data=Rail,
......@@ -293,6 +295,15 @@ b0
(b$reml.scale/b$sp)^.5 ## `gam' ML estimate of Rail sd
b$reml.scale^.5 ## `gam' ML estimate of residual sd
b0 <- lme(travel~1,data=Rail,~1|Rail,method="REML")
Z <- model.matrix(~Rail-1,data=Rail,
contrasts.arg=list(Rail="contr.treatment"))
b <- gam(travel~Z,data=Rail,paraPen=list(Z=list(diag(6))),method="REML")
b0
(b$reml.scale/b$sp)^.5 ## `gam' REML estimate of Rail sd
b$reml.scale^.5 ## `gam' REML estimate of residual sd
}
\keyword{models} \keyword{regression}%-- one or more ..
......
......@@ -18,8 +18,10 @@ parameter is unknown or an Un-Biased Risk Estimator (UBRE) when it is known. UBR
scaled AIC (Generalized case) or Mallows' Cp (additive model case).
GCV and UBRE are covered in Craven and Wahba (1979) and Wahba (1990).
Alternatively REML of maximum likelihood (ML) may be used for smoothness
selection, by viewing the smooth components as random effects. The \code{method} argument
to \code{\link{gam}} selects the smoothness selection criterion.
selection, by viewing the smooth components as random effects (in this case the variance component for each
smooth random effect will be given by the scale parameter divided by the smoothing parameter --- for
smooths with multiple penalties, there will be multiple variance components).
The \code{method} argument to \code{\link{gam}} selects the smoothness selection criterion.
Automatic smoothness selection is unlikely to be successful with few data, particularly with
......
......@@ -52,6 +52,8 @@ cases, not at the MLE.}
\item{edf}{estimated degrees of freedom for each model parameter. Penalization
means that many of these are less than 1.}
\item{edf1}{similar, but using alternative estimate of EDF. Useful for testing.}
\item{family}{family object specifying distribution and link used.}
......@@ -126,6 +128,10 @@ for smoothness estimation. }
\item{residuals}{the working residuals for the fitted model.}
\item{scale}{when present, the scale (as \code{sig2})}
\item{scale.estimated}{ \code{TRUE} if the scale parameter was estimated, \code{FALSE} otherwise.}
\item{sig2}{estimated or supplied variance/scale parameter.}
\item{smooth}{list of smooth objects, containing the basis information for each term in the
......
\name{ls.size}
\alias{ls.size}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Size of list elements}
\description{Produces a named array giving the size, in bytes, of the elements of a list.
}
\usage{
ls.size(x)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{x}{ A list.}
}
\value{
An object of class \code{"gam"} as described in \code{\link{gamObject}}.
}
\details{ A numeric vector giving the size in bytes of each element of the list \code{x}. The elements of the array have the
same names as the elements of the list. If \code{x} is not a list then its size in bytes is returned, un-named.
}
\references{
\url{http://www.maths.bath.ac.uk/~sw283/}
}
\author{ Simon N. Wood \email{simon.wood@r-project.org}
}
\examples{
library(mgcv)
b <- list(M=matrix(runif(100),10,10),quote=
"The world is ruled by idiots because only an idiot would want to rule the world.",
fam=binomial())
ls.size(b)
}
......@@ -5,8 +5,8 @@
GAMs with GCV/AIC/REML smoothness estimation and GAMMs by REML/PQL
}
\description{
\code{mgcv} provides functions for generalized additive modelling and
generalized additive mixed modelling. The term GAM is taken to include
\code{mgcv} provides functions for generalized additive modelling (\code{\link{gam}} and \code{\link{bam}}) and
generalized additive mixed modelling (\code{\link{gamm}}). The term GAM is taken to include
any GLM estimated by quadratically penalized (possibly quasi-) likelihood maximization.
Particular features of the package are facilities for automatic smoothness selection,
......@@ -50,13 +50,18 @@ individual smooth terms for equality to the zero function, using similar ideas.
approximations can be used for hypothesis testing based model comparison. See \code{\link{anova.gam}} and
\code{\link{summary.gam}} for more on hypothesis testing.
For large datasets (that is large n) see \code{\link{bam}} which is a version of \code{\link{gam}} with
a much reduced memory footprint.
The package also provides a generalized additive mixed modelling function,
\code{\link{gamm}}, based on a PQL approach and
\code{lme} from the \code{nlme} library. \code{gamm} is particularly useful
\code{lme} from the \code{nlme} library (for an \code{lme4} based version, see package \code{gamm4}).
\code{gamm} is particularly useful
for modelling correlated data (i.e. where a simple independence model for the
residual variation is inappropriate). In addition, low level routine \code{\link{magic}}
can fit models to data with a known correlation structure.
Some underlying GAM fitting methods are available as low level fitting
functions: see \code{\link{magic}} and \code{\link{mgcv}}. But there is little functionality
that can not be more conventiently accessed via \code{\link{gam}} .
......
\name{rig}
\alias{rig}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Generate inverse Gaussian random deviates}
\description{Generates inverse Gaussian random deviates.
}
\usage{
rig(n,mean,scale)
}
%- maybe also `usage' for other objects documented here.
\arguments{
\item{n}{the number of deviates required. If this has length > 1 then the length is taken as the number of deviates required.}
\item{mean}{vector of mean values.}
\item{scale}{vector of scale parameter values (lambda, see below)}
}
\value{
A vector of inverse Gaussian random deviates.
}
\details{ If x if the returned vector, then E(x) = \code{mean} while var(x) = \code{scale*mean^3}. For density and distribution functions
see the \code{statmod} package. The algorithm used is Algorithm 5.7 of Gentle (2003), based on Michael et al. (1976). Note that \code{scale}
here is the scale parameter in the GLM sense, which is the reciprocal of the usual `lambda' parameter.
}
\references{
Gentle, J.E. (2003) Random Number Generation and Monte Carlo Methods (2nd ed.) Springer.
Michael, J.R., W.R. Schucany & R.W. Hass (1976) Generating random variates using transformations
with multiple roots. The American Statistician 30, 88-90.
\url{http://www.maths.bath.ac.uk/~sw283/}
}
\author{ Simon N. Wood \email{simon.wood@r-project.org}
}
\examples{
set.seed(7)
## An inverse.gaussian GAM example, by modify `gamSim' output...
dat <- gamSim(1,n=400,dist="normal",scale=1)
dat$f <- dat$f/4 ## true linear predictor
Ey <- exp(dat$f);scale <- .5 ## mean and GLM scale parameter
## Note that `shape' and `scale' in `rgamma' are almost
## opposite terminology to that used with GLM/GAM...
dat$y <- rig(Ey,mean=Ey,scale=.2)
big <- gam(y~ s(x0)+ s(x1)+s(x2)+s(x3),family=inverse.gaussian(link=log),
data=dat,method="REML")
plot(big,pages=1)
gam.check(big)
summary(big)
}
......@@ -41,7 +41,8 @@ Residual degrees of freedom are taken as number of data minus model degrees of
freedom.
Let \eqn{ {\bf P}_i}{P_i} be the matrix
giving the parameters of the ith smooth when applied to the data (or pseudodata in the generalized case) and let \eqn{ {\bf X}}{X}
be the design matrix of the model. Then \eqn{ tr({\bf XP}_i )}{tr(XP_i)} is the edf for the ith term. Clearly this definition causes the edf's to add up properly!
be the design matrix of the model. Then \eqn{ tr({\bf XP}_i )}{tr(XP_i)} is the edf for the ith term. Clearly this
definition causes the edf's to add up properly!
\code{print.summary.gam} tries to print various bits of summary information useful for term selection in a pretty way.
......@@ -72,10 +73,7 @@ EDF for the term. The statistic used is then
(this can be calculated efficiently without forming the pseudoinverse explicitly). \eqn{T}{T} is compared to a
chi-squared distribution with degrees of freedom given by the EDF for the term + \code{alpha} * (number of
estimated smoothing parameters), or \eqn{T}{T} is used as a component in an F ratio statistic if the
scale parameter has been estimated. There is some simulation evidence that the adjustment per
smoothing parameter improves p-values when using GCV or AIC type smoothing parameter selection (\code{alpha=0.5}
seems to be best). The default \code{alpha=0} seems to be appropriate when smoothing
parameters are selected by REML/ML/PQL. The non-integer rank truncated inverse is constructed to give an
scale parameter has been estimated. The non-integer rank truncated inverse is constructed to give an
approximation varying smoothly between the bounding integer rank approximations, while yielding test statistics with the
correct mean and variance.
......@@ -86,10 +84,7 @@ for \eqn{\bf f}{f} implied by the truncation used in the test statistic.
Note that the p-values distributional approximations start to break down below one effective degree of freedom, and
p-values are not reported below 0.5 degrees of freedom.
Also note that increasing the \code{gamma} parameter of \code{\link{gam}} tends to mean that \code{alpha} should be decreased,
to get close to uniform p-value distribution under the null.
For best p-value performance it seems to be best to use ML or REML smoothness selection.
In simualtions the p-values have best behaviour under ML smoothness selection, with REML coming second.
}
......
# Translation of R-mgcv.pot to German
# Copyright (C) 2005 The R Foundation
# Copyright (C) 2005-2009 The R Foundation
# This file is distributed under the same license as the mgcv package.
# Chris Leick <c.leick@vollbio.de>, 2009.
#
......@@ -8,10 +8,10 @@
#
msgid ""
msgstr ""
"Project-Id-Version: R 2.9.2 / mgcv 1.5-5\n"
"Project-Id-Version: R 2.10.0 / mgcv 1.5-5\n"
"Report-Msgid-Bugs-To: bugs@r-project.org\n"
"POT-Creation-Date: 2005-12-09 07:31\n"
"PO-Revision-Date: 2009-09-03 15:31+0200\n"
"PO-Revision-Date: 2009-10-08 16:16+0200\n"
"Last-Translator: Chris Leick <c.leick@vollbio.de>\n"
"Language-Team: German <debian-l10n-german@lists.debian.org>\n"
"MIME-Version: 1.0\n"
......
# Translation of mgcv.pot to German
# Copyright (C) 2005 The R Foundation
# Copyright (C) 2005-2009 The R Foundation
# This file is distributed under the same license as the mgcv package.
# Chris Leick <c.leick@vollbio.de>, 2009.
#
msgid ""
msgstr ""
"Project-Id-Version: R 2.9.2 / mgcv 1.5-5\n"
"Project-Id-Version: R 2.10.0 / mgcv 1.5-5\n"
"Report-Msgid-Bugs-To: bugs@R-project.org\n"
"POT-Creation-Date: 2005-12-09 07:31+0000\n"
"PO-Revision-Date: 2009-09-03 15:25+0200\n"
"PO-Revision-Date: 2009-10-08 16:16+0200\n"
"Last-Translator: Chris Leick <c.leick@vollbio.de>\n"
"Language-Team: German <debian-l10n-german@lists.debian.org>\n"
"MIME-Version: 1.0\n"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment