Commit 5447230a authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.7-0

parent 91641428
Package: mgcv
Version: 1.6-2
Version: 1.7-0
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: GAMs with GCV/AIC/REML smoothness estimation and GAMMs by PQL
......@@ -12,6 +12,6 @@ Imports: graphics, stats, nlme, Matrix
Suggests: nlme (>= 3.1-64), splines, Matrix
LazyLoad: yes
License: GPL (>= 2)
Packaged: 2010-04-24 15:31:36 UTC; simon
Packaged: 2010-11-02 10:24:50 UTC; simon
Repository: CRAN
Date/Publication: 2010-04-27 19:09:58
Date/Publication: 2010-11-03 07:41:48
useDynLib(mgcv, .registration = TRUE, .fixes = "C_")
export(anova.gam, bam, bam.update, cSplineDes,
export(anova.gam, bam, bam.update, concurvity, cSplineDes,
exclude.too.far,extract.lme.cov, extract.lme.cov2,
formXtViX, full.score, formula.gam,fixDependence,fix.family.link,
fix.family.var, fix.family.ls, gam, gam2derivative,
fix.family.var, fix.family.ls, fix.family.qf,fix.family.rd,
gam, gam2derivative,
gam2objective,
gamm, gam.check, gam.control,gam.fit3,
gam.fit, gam.outer,gam.vcomp, gamSim , influence.gam,
interpret.gam,
in.out,interpret.gam,
gam.side,
get.var,ldTweedie,
initial.sp,logLik.gam,ls.size,
magic, magic.post.proc, mgcv, mgcv.control, model.matrix.gam,
mono.con, mroot, negbin, new.name,
notExp,notExp2,notLog,notLog2,pcls,null.space.dimension,
pdIdnot,pdTens,
pen.edf,pdIdnot,pdTens,
place.knots, plot.gam, print.anova.gam,
print.gam,print.summary.gam,predict.gam,
PredictMat,Predict.matrix,Predict.matrix2,
......@@ -24,9 +25,11 @@ export(anova.gam, bam, bam.update, cSplineDes,
Predict.matrix.tensor.smooth,
Predict.matrix.tprs.smooth,
Predict.matrix.ts.smooth,
Predict.matrix.mrf.smooth,
Predict.matrix.pspline.smooth,
Predict.matrix.random.effect,
residuals.gam,rig, s,
qq.gam,
residuals.gam,rig,rTweedie, s,
smoothCon,smooth.construct,smooth.construct2,
smooth.construct.cc.smooth.spec,
smooth.construct.cp.smooth.spec,
......@@ -37,9 +40,10 @@ export(anova.gam, bam, bam.update, cSplineDes,
smooth.construct.ts.smooth.spec,
smooth.construct.ps.smooth.spec,
smooth.construct.re.smooth.spec,
smooth.construct.mrf.smooth.spec,
smooth.construct.ad.smooth.spec,
summary.gam,sp.vcov,
te,tensor.prod.model.matrix,tensor.prod.penalties,
t2,te,tensor.prod.model.matrix,tensor.prod.penalties,
Tweedie,uniquecombs, vcov.gam, vis.gam)
importFrom(grDevices,cm.colors,gray,heat.colors,terrain.colors,topo.colors)
......
......@@ -15,6 +15,18 @@ ls.size <- function(x) {
sz
}
rwMatrix <- function(stop,row,weight,X) {
## Routine to recombine the rows of a matrix X according to info in
## stop,row and weight. Consider the ith row of the output matrix
## ind <- 1:stop[i] if i==1 and ind <- (stop[i-1]+1):stop[i]
## otherwise. The ith output row is then X[row[ind],]*weight[ind]
if (is.matrix(X)) { n <- nrow(X);p<-ncol(X);ok <- TRUE} else { n<- length(X);p<-1;ok<-FALSE}
stop <- stop - 1;row <- row - 1 ## R indeces -> C indeces
oo <-.C(C_rwMatrix,as.integer(stop),as.integer(row),as.double(weight),X=as.double(X),as.integer(n),as.integer(p))
if (ok) return(matrix(oo$X,n,p)) else
return(oo$X)
}
qr.update <- function(Xn,yn,R=matrix(0,0,ncol(Xn)),f=array(0,0),y.norm2=0)
## Let X = QR and f = Q'y. This routine updates f and R
......@@ -243,43 +255,89 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
} ## end bgam.fit
bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method)
bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0)
## function that does big additive model fit in strictly additive case
{ n <- nrow(mf)
{ ## first perform the QR decomposition, blockwise....
n <- nrow(mf)
if (rho!=0) { ## AR1 error model
ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
sd <- -rho*ld ## sub diagonal
}
if (n>chunk.size) {
G$coefficients <- rep(0,ncol(G$X))
class(G) <- "gam"
ind <- 1:chunk.size
G$model <- mf[ind,]
X <- predict(G,type="lpmatrix")
y <- G$model[[gp$response]] - G$offset[ind]
w <- sqrt(G$w[ind])
qrx <- qr.update(w*X,w*y)
n.block <- n%/%chunk.size ## number of full sized blocks
stub <- n%%chunk.size ## size of end block
if (n.block>1) for (i in 2:n.block) {
ind <- ind + chunk.size
if (stub>0) n.block <- n.block + 1
# start <- 0:(n.block-1)*chunk.size + 1 ## block starts
# end <- start + chunk.size; ## block ends
# if (rho==0) end <- end - 1 ## otherwise most blocks go to 1 beyond block end
# end[n.block] <- n ## block ends
start <- 0:(n.block-1)*chunk.size ## block starts
end <- start + chunk.size; ## block ends
end[n.block] <- n
if (rho==0) start <- start + 1 ## otherwise most blocks go to 1 before block start
start[1] <- 1
qrx <- list(R=matrix(0,0,ncol(G$X)),f=array(0,0),y.norm2=0) ## initial empty qr object
for (i in 1:n.block) {
ind <- start[i]:end[i]
if (rho!=0) {
N <- end[i]-start[i]+1
## following are alternative form (harder to update)...
#row <- rep(1:N,rep(2,N))[-1]
#weight <- c(rep(c(ld,sd),N-1),ld)
#if (end[i]==n) weight[2*N-1] <- 1
#stop <- c(1:(N-1)*2,2*N-1)
row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)])
weight <- c(1,rep(c(sd,ld),N-1))
stop <- c(1,1:(N-1)*2+1)
}
G$model <- mf[ind,]
X <- predict(G,type="lpmatrix")
y <- G$model[[gp$response]] - G$offset[ind]
w <- sqrt(G$w[ind])
qrx <- qr.update(w*X,w*y,qrx$R,qrx$f,qrx$y.norm2)
X <- w*predict(G,type="lpmatrix")
y <- w*(G$model[[gp$response]] - G$offset[ind])
if (rho!=0) {
## Apply transform...
# if (end[i]==n) {
# X <- rwMatrix(stop,row,weight,X)
# y <- rwMatrix(stop,row,weight,y)
# } else {
# X <- rwMatrix(stop,row,weight,X)[-N,]
# y <- rwMatrix(stop,row,weight,y)[-N]
# }
if (end[i]==n) yX.last <- c(y[nrow(X)],X[nrow(X),]) ## store final row, in case of update
if (i==1) {
X <- rwMatrix(stop,row,weight,X)
y <- rwMatrix(stop,row,weight,y)
} else {
X <- rwMatrix(stop,row,weight,X)[-1,]
y <- rwMatrix(stop,row,weight,y)[-1]
}
}
qrx <- qr.update(X,y,qrx$R,qrx$f,qrx$y.norm2)
gc()
}
if (stub>0) {
ind <- (n-stub+1):n
G$model <- mf[ind,]
X <- predict(G,type="lpmatrix")
y <- G$model[[gp$response]] - G$offset[ind]
w <- sqrt(G$w[ind])
qrx <- qr.update(w*X,w*y,qrx$R,qrx$f,qrx$y.norm2)
gc()
}
G$n <- n
G$y <- mf[[gp$response]]
} else { ## n <= chunk.size
qrx <- qr.update(sqrt(G$w)*G$X,sqrt(G$w)*G$y)
if (rho==0) qrx <- qr.update(sqrt(G$w)*G$X,sqrt(G$w)*G$y) else {
#row <- rep(1:n,rep(2,n))[-1]
#weight <- c(rep(c(ld,sd),n-1),1)
#stop <- c(1:(n-1)*2,2*n-1)
row <- c(1,rep(1:n,rep(2,n))[-c(1,2*n)])
weight <- c(1,rep(c(sd,ld),n-1))
stop <- c(1,1:(n-1)*2+1)
yX.last <- c(G$y[n],G$X[n,]) ## store final row, in case of update
X <- rwMatrix(stop,row,weight,sqrt(G$w)*G$X)
y <- rwMatrix(stop,row,weight,sqrt(G$w)*G$y)
qrx <- qr.update(X,y)
}
}
rss.extra <- qrx$y.norm2 - sum(qrx$f^2)
......@@ -302,6 +360,9 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method)
G$n.true <- n
object <- gam(G=G,method=method,gamma=gamma,scale=scale)
y -> G$y; w -> G$w; n -> G$n;offset -> G$offset
if (rho!=0) { ## correct RE/ML score for AR1 transform
object$gcv.ubre <- object$gcv.ubre - (n-1)*log(ld)
}
}
if (method=="GCV.Cp") {
......@@ -316,14 +377,19 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method)
object$rank <- fit$gcv.info$rank
object$Ve <- post$Ve
object$Vp <- post$Vb
object$sig2 <- object$scale <- fit$scale
object$sp <- fit$sp
object$sig2 <- object$scale <- fit$scale
object$sp <- fit$sp
class(object)<-c("gam")
} else {
}
G$smooth <- G$X <- NULL
object$AR1.rho <- rho
if (rho!=0) { ## need to store last model matrix row, to allow update
object$yX.last <- yX.last
}
object$gamma <- gamma;object$G <- G;object$qrx <- qrx ## to allow updating of the model
object
}
......@@ -333,7 +399,7 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method)
bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action=na.omit,
offset=NULL,method="REML",control=gam.control(),scale=0,gamma=1,knots=NULL,
sp=NULL,min.sp=NULL,paraPen=NULL,chunk.size=10000,...)
sp=NULL,min.sp=NULL,paraPen=NULL,chunk.size=10000,rho=0,...)
## Routine to fit an additive model to a large dataset. The model is stated in the formula,
## which is then interpreted to figure out which bits relate to smooth terms and which to
......@@ -356,7 +422,7 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
mf<-match.call(expand.dots=FALSE)
mf$formula<-gp$fake.formula
mf$method <- mf$family<-mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$min.sp <-
mf$gamma <- mf$paraPen<- mf$chunk.size <- mf$...<-NULL
mf$gamma <- mf$paraPen<- mf$chunk.size <- mf$rho <- mf$...<-NULL
mf$drop.unused.levels<-TRUE
mf[[1]]<-as.name("model.frame")
pmf <- mf
......@@ -425,8 +491,9 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
if (control$trace) cat("Setup complete. Calling fit\n")
if (am) {
object <- bam.fit(G,mf,chunk.size,gp,scale,gamma,method)
object <- bam.fit(G,mf,chunk.size,gp,scale,gamma,method,rho=rho)
} else {
if (rho!=0) warning("AR1 parameter rho unused with generalized model")
object <- bgam.fit(G, mf, chunk.size, gp ,scale ,gamma,method=method,
control = control,...)
}
......@@ -517,7 +584,7 @@ bam.update <- function(b,data,chunk.size=10000) {
}
b$model <- rbind(b$model,mf)
b$model <- rbind(b$model,mf) ## complete model frame --- old + new
## get response and offset...
......@@ -536,7 +603,29 @@ bam.update <- function(b,data,chunk.size=10000) {
w <- sqrt(w)
b$qrx <- mgcv:::qr.update(w*X,w*y,b$qrx$R,b$qrx$f,b$qrx$y.norm2)
if (b$AR1.rho!=0) { ## original model had AR1 error structure...
rho <- b$AR1.rho
ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
sd <- -rho*ld ## sub diagonal
## append the final row of weighted X and y from original fit, first
wy <- c(b$yX.last[1],w*y)
wX <- rbind(b$yX.last[-1],w*X)
m <- nrow(wX)
b$yX.last <- c(wy[m],wX[m,])
row <- c(1,rep(1:m,rep(2,m))[-c(1,2*m)])
weight <- c(1,rep(c(sd,ld),m-1))
stop <- c(1,1:(m-1)*2+1)
## re-weight to independence....
wX <- rwMatrix(stop,row,weight,wX)[-1,]
wy <- rwMatrix(stop,row,weight,wy)[-1]
## update
b$qrx <- mgcv:::qr.update(wX,wy,b$qrx$R,b$qrx$f,b$qrx$y.norm2)
} else {
b$qrx <- mgcv:::qr.update(w*X,w*y,b$qrx$R,b$qrx$f,b$qrx$y.norm2)
}
## now do the refit...
rss.extra <- b$qrx$y.norm2 - sum(b$qrx$f^2)
......@@ -565,7 +654,7 @@ bam.update <- function(b,data,chunk.size=10000) {
b$G$pearson.extra <- rss.extra
b$G$n.true <- n
if (b$scale.estimated) scale <- -1 else scale = b$sig2
in.out <- list(sp=b$sp,scale=b$gcv.ubre)
in.out <- list(sp=b$sp,scale=b$reml.scale)
object <- gam(G=b$G,method=method,gamma=b$gamma,scale=scale,in.out=in.out)
offset -> b$G$offset -> b$offset
w -> b$G$w -> b$weights -> b$prior.weights; n -> b$G$n
......@@ -599,7 +688,9 @@ bam.update <- function(b,data,chunk.size=10000) {
b$Vp <- object$Vp
b$sig2 <- b$scale <- object$sig2
b$sp <- object$sp
if (b$AR1.rho!=0) { ## correct RE/ML score for AR1 transform
b$gcv.ubre <- b$gcv.ubre - (n-1)*log(ld)
}
}
b$G$X <- NULL
b$linear.predictors <- as.numeric(predict.gam(b,newdata=b$model,block.size=chunk.size))
......
......@@ -2038,13 +2038,25 @@ negbin <- function (theta = stop("'theta' must be specified"), link = "log") {
n <- rep(1, nobs)
mustart <- y + (y == 0)/6
})
environment(dvar) <- environment(d2var) <- environment(variance) <- environment(validmu) <-
rd <- function(mu,wt,scale) {
Theta <- get(".Theta")
rnbinom(mu,size=Theta,mu=mu)
}
qf <- function(p,mu,wt,scale) {
Theta <- get(".Theta")
qnbinom(p,size=Theta,mu=mu)
}
environment(qf) <- environment(rd) <- environment(dvar) <- environment(d2var) <-
environment(variance) <- environment(validmu) <-
environment(ls) <- environment(dev.resids) <- environment(aic) <- environment(getTheta) <- env
famname <- paste("Negative Binomial(", format(round(theta,3)), ")", sep = "")
structure(list(family = famname, link = linktemp, linkfun = stats$linkfun,
linkinv = stats$linkinv, variance = variance,dvar=dvar,d2var=d2var,d3var=d3var, dev.resids = dev.resids,
aic = aic, mu.eta = stats$mu.eta, initialize = initialize,ls=ls,
validmu = validmu, valideta = stats$valideta,getTheta = getTheta,canonical="log"), class = "family")
validmu = validmu, valideta = stats$valideta,getTheta = getTheta,qf=qf,rd=rd,canonical="log"), class = "family")
}
......@@ -2228,10 +2240,59 @@ Tweedie <- function(p=1,link=power(0)) {
scale <- dev/sum(wt)
-2*sum(ldTweedie(y,mu,p=power,phi=scale)[,1]*wt) + 2
}
if (p==2) {
rd <- function(mu,wt,scale) {
rgamma(mu,shape=1/scale,scale=mu*scale)
}
} else {
rd <- function(mu,wt,scale) {
rTweedie(mu,p=p,phi=scale)
}
}
structure(list(family = paste("Tweedie(",p,")",sep=""), variance = variance,
dev.resids = dev.resids,aic = aic, link = linktemp, linkfun = stats$linkfun, linkinv = stats$linkinv,
mu.eta = stats$mu.eta, initialize = initialize, validmu = validmu,
valideta = stats$valideta,dvar=dvar,d2var=d2var,d3var=d3var,ls=ls,canonical="none"), class = "family")
valideta = stats$valideta,dvar=dvar,d2var=d2var,d3var=d3var,ls=ls,rd=rd,canonical="none"), class = "family")
}
}
\ No newline at end of file
rTweedie <- function(mu,p=1.5,phi=1) {
## generate Tweedie random variables, with 1<p<2,
## adapted from rtweedie in the tweedie package
if (p<=1||p>=2) stop("p must be in (1,2)")
if (sum(mu<0)) stop("mean, mu, must be non negative")
if (phi<=0) stop("scale parameter must be positive")
lambda <- mu^(2-p)/((2-p)*phi)
shape <- (2-p)/(p-1)
scale <- phi*(p-1)*mu^(p-1)
n.sim <- length(mu)
## how many Gamma r.v.s to sum up to get Tweedie
## 0 => none, and a zero value
N <- rpois(length(lambda),lambda)
## following is a vector of N[i] copies of each gamma.scale[i]
## concatonated one after the other
gs <- rep(scale,N)
## simulate gamma deviates to sum to get tweedie deviates
y <- rgamma(gs*0+1,shape=shape,scale=gs)
## create summation index...
lab <- rep(1:length(N),N)
## sum up each gamma sharing a label. 0 deviate if label does not occur
o <- .C(C_psum,y=as.double(rep(0,n.sim)),as.double(y),as.integer(lab),as.integer(length(lab)))
o$y
}
......@@ -366,10 +366,24 @@ gamm.setup<-function(formula,pterms,data=stop("No data supplied to gamm.setup"),
X <- G$X[,ind,drop=FALSE] # accumulate fixed effects into here
xlab <- rep("",0)
## code to deal with t2 smooths, by splitting up into single terms
if (G$m) {
sme <- expand.t2.smooths(G$smooth)
if (is.null(sme)) G$original.smooth <- NULL else {
G$original.smooth <- G$smooth
G$smooth <- sme ## G's smooth list is replaced by expanded version, until some time after model fitting is complete
rm(sme)
}
## G$m is always the length of G$smooth here...
G$m <- length(G$smooth)
}
if (G$m)
for (i in 1:G$m)
{ sm <- G$smooth[[i]]
sm$X <- G$X[,sm$first.para:sm$last.para]
sm$X <- G$X[,sm$first.para:sm$last.para,drop=FALSE]
if (inherits(sm,"tensor.smooth"))
{ if (sum(sm$fx)==length(sm$fx)) sm$fixed <- TRUE
else
......@@ -480,7 +494,7 @@ gamm.setup<-function(formula,pterms,data=stop("No data supplied to gamm.setup"),
G$random<-random
G$X<-X ## fixed effects model matrix
G
}
......@@ -952,7 +966,7 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
if (is.null(random)&&n.sr==0)
stop("gamm models must have at least 1 smooth with unknown smoothing parameter or at least one other random effect")
g<-as.factor(G$y*0+1) ## needed, whatever codetools says
g <- as.factor(rep(1,G$n)) ##as.factor(G$y*0+1) ## needed, whatever codetools says
offset.name <- attr(mf,"names")[attr(attr(mf,"terms"),"offset")]
......@@ -1035,7 +1049,11 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
df.null=nrow(G$X),y=G$y,terms=gam.terms,pterms=pTerms,xlevels=G$xlevels,
contrasts=G$contrasts,assign=G$assign,na.action=attr(mf,"na.action"),
cmX=G$cmX,var.summary=G$var.summary,scale.estimated=TRUE)
# Transform parameters back to the original space....
#######################################################
## Transform parameters back to the original space....
#######################################################
bf<-as.numeric(ret$lme$coefficients$fixed)
br<-as.numeric(unlist(ret$lme$coefficients$random))
if (G$nsdf) p<-bf[1:G$nsdf] else p<-array(0,0)
......@@ -1120,7 +1138,26 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
class(object)<-"gam"
##object$full.formula <- G$full.formula
object$fitted.values <- predict.gam(object,type="response")
## Restore original smooth list, if it was split to deal with t2 terms...
if (!is.null(G$original.smooth)) {
object$smooth <- G$smooth <- G$original.smooth
}
## If prediction parameterization differs from fit parameterization, transform now...
## (important for t2 smooths, where fit constraint is not good for component wise
## prediction s.e.s)
if (!is.null(G$P)) {
object$coefficients <- G$P %*% object$coefficients
object$Vp <- G$P %*% object$Vp %*% t(G$P)
object$Ve <- G$P %*% object$Ve %*% t(G$P)
}
# object$fitted.values <- predict.gam(object,type="response")
object$linear.predictors <- predict.gam(object,type="link")
object$fitted.values <- object$family$linkinv(object$linear.predictors)
object$residuals <- residuals(ret$lme) #as.numeric(G$y) - object$fitted.values
if (G$nsdf>0) term.names<-colnames(G$X)[1:G$nsdf] else term.names<-array("",0)
......@@ -1133,7 +1170,9 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
k<-k+1
}
}
names(object$coefficients)<-term.names # note - won't work on matrices!!
names(object$coefficients) <- term.names # note - won't work on matrices!!
names(object$edf) <- term.names
names(object$sp) <- names(G$sp)
if (is.null(weights))
object$prior.weights <- object$y*0+1
else if (inherits(weights,"varFunc"))
......@@ -1142,6 +1181,7 @@ gamm <- function(formula,random=NULL,correlation=NULL,family=gaussian(),data=lis
object$weights<-object$prior.weights
ret$gam<-object
ret
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
** denotes quite substantial/important changes
*** denotes really big changes
ISSUES:
1.7-0
** `t2' alternative tensor product smooths added. These can be used with
gamm4.
** "mrf" smooth class added (at the suggestion of Thomas Kneib).
Implements smoothing over discrete geographic districts using a
Markov random field penalty. See ?mrf
* qq.gam added to allow better checking of distribution of residuals.
* gam.check modified to use qq.gam for QQ plots of deviance residuals.
Also, it now works with gam(*, na.action = "na.replace") and NAs.
* `concurvity' function added to provide simple concurvity measures.
* plot.gam automatic layout modified to be a bit more sensible (i.e.
to recognise that most screens are landscape, and that usually
squarish plots are wanted).
* Plot method added for mrf smooths.
* in.out function added to test whether points are interior to
a region defined by a set of polygons. Useful when working with
MRFs.
* `plot.gam' restructured so that smooths are plotted by smooth specific
plot methods.
* Plot method added for "random.effect" smooth class.
* `pen.edf' function added to extract EDF associated with each penalty.
Useful with t2 smooths.
* Facilty provided to allow different identifiability constraints to be
used for fitting and prediction. This allows t2 smooths to be fitted
with a constraint that allows fitting by gamm4, but still perform
inference with the componentwise optimal sum to zero constraints.
* mgcv-FAQ.Rd added.
* paraPen works properly with `gam.vcomp' and full.sp names returned
correctly.
* bam (and bam.update) can now employ an AR1 error model in the
guassian-identity case.
* bam.update modified for faster updates (initial scale parameter
estimate now supplied in RE/ML case)
* Absorption of identifiability constraints modified to allow
constraints that only affect some parameters to leave rest of
parameters completely unchanged.
* rTweedie added for quick simulation of Tweedie random deviates
when 1<p<2.
* smooth.terms help file fixed so cyclic p spline identifies as "cp"
and not "cs"!
* bug fix in `gamm' so that binomial response can be provided as 2 column
matrix, in standard `glm' way.
1.6-2
......@@ -16,7 +80,7 @@
could fail to fit properly, because of an indexing error (caused odd
results with indicator by variables)
* help files have had various misuses on `itemize' fixed.
* help files have had various misuses of `itemize' fixed.
* initialization could fail if response was actually a 1D array. fixed.
......@@ -123,7 +187,7 @@
* There was a bug in the calculation of the Bayesian cov matrix, when the
scale parameter was known: it was always using an estimated scale
parameter. Makes no statistically meaningful difference for a model
that fits propoerly, of course.
that fits properly, of course.
* Some junk removed from gam object.
......@@ -158,7 +222,6 @@
* `newdata.guaranteed' argument to predict.gam didn't work. fixed.
1.5-5
* `gamm.setup' made an assumption about basis dimensions which was not
......@@ -211,7 +274,7 @@
possible to fit very poor models, and then have non-sensical effective
degrees of freedom reported. This has been fixed by using much more stable
expressions for edf calculation, when outer iteration smoothness
selection is empolyed. (Changes are to gam.fit3, gam.outer and a new
selection is employed. (Changes are to gam.fit3, gam.outer and a new
routine gam.fit3.post.proc).
* edfs in version 1.5-0 were calculated using newton, rather than fisher
......
File added
......@@ -17,7 +17,7 @@ for large datasets.
bam(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,
na.action=na.omit, offset=NULL,method="REML",control=gam.control(),
scale=0,gamma=1,knots=NULL,sp=NULL,min.sp=NULL,paraPen=NULL,
chunk.size=10000,...)
chunk.size=10000,rho=0,...)
}
%- maybe also `usage' for other objects documented here.
......@@ -103,6 +103,8 @@ if smooths share smoothing parameters).}
\item{chunk.size}{The model matrix is created in chunks of this size, rather than ever being formed whole.}
\item{rho}{An AR1 error model can be used for the residuals (based on dataframe order), of Gaussian-identity
link models. This is the AR1 correlation parameter.}
\item{...}{further arguments for
passing on e.g. to \code{gam.fit} (such as \code{mustart}). }
......
......@@ -4,7 +4,7 @@
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Update a strictly additive bam model for new data.}
\description{ Strictly additive models fitted by \code{\link{bam}} can be efficiently updated as new data becomes available,
\description{ Guassian with identity link models fitted by \code{\link{bam}} can be efficiently updated as new data becomes available,
by simply updateing the QR decomposition on which estimation is based, and re-optimizing the smoothing parameters, starting
from the previous estimates. This routine implements this.
}
......@@ -28,10 +28,13 @@ An object of class \code{"gam"} as described in \code{\link{gamObject}}.
\details{ \code{bam.update} updates the QR decomposition of the (weighted) model matrix of the GAM represented by \code{b} to take
account of the new data. The orthogonal factor multiplied by the response vector is also updated. Given these updates the model
and smoothing parameters can be re-estimated, as if the whole dataset (original and the new data) had been fitted in one go.
and smoothing parameters can be re-estimated, as if the whole dataset (original and the new data) had been fitted in one go. The
function will use the same AR1 model for the residuals as that employed in the original model fit (see \code{rho} parameter
of \code{\link{bam}}).
Note that there may be small numerical differences in fit between fitting the data all at once, and fitting in stages by updating, if the
smoothing bases used have any of their detials set with reference to the data (e.g. default knot locations).
Note that there may be small numerical differences in fit between fitting the data all at once, and fitting in
stages by updating, if the smoothing bases used have any of their details set with reference
to the data (e.g. default knot locations).
}
......@@ -54,11 +57,11 @@ smoothing bases used have any of their detials set with reference to the data (e
library(mgcv)
## following is not *very* large, for obvious reasons...
set.seed(8)
dat <- gamSim(1,n=5000,dist="normal",scale=5)
n <- 5000
dat <- gamSim(1,n=n,dist="normal",scale=5)
dat[c(50,13,3000,3005,3100),]<- NA
dat1 <- dat[4001:5000,]
dat0 <- dat[1:4000,]
dat1 <- dat[(n-999):n,]
dat0 <- dat[1:(n-1000),]
bs <- "ps";k <- 20
method <- "GCV.Cp"
b <- bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
......@@ -72,6 +75,22 @@ b3 <- bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat,method=method)
b1;b2;b3
## example with AR1 errors...
e <- rnorm(n)
for (i in 2:n) e[i] <- e[i-1]*.7 + e[i]
dat$y <- dat$f + e*3
dat[c(50,13,3000,3005,3100),]<- NA
dat1 <- dat[(n-999):n,]
dat0 <- dat[1:(n-1000),]
method <- "ML"
b <- bam(y ~ s(x0,bs=bs,k=k)+s(x1,bs=bs,k=k)+s(x2,bs=bs,k=k)+
s(x3,bs=bs,k=k),data=dat0,method=method,rho=0.7)
b1 <- bam.update(b,dat1)
summary(b1);summary(b2);summary(b3)
}
......
\name{columb}
\alias{columb}
\alias{columb.polys}