Commit b955da99 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.7-16

parent 7aa70fc3
Package: mgcv
Version: 1.7-13
Version: 1.7-16
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: GAMs with GCV/AIC/REML smoothness estimation and GAMMs by PQL
Title: Mixed GAM Computation Vehicle with GCV/AIC/REML smoothness
estimation
Description: Routines for GAMs and other generalized ridge regression
with multiple smoothing parameter selection by GCV, REML or
UBRE/AIC. Also GAMMs by REML or PQL. Includes a gam() function.
UBRE/AIC. Also GAMMs. Includes a gam() function.
Priority: recommended
Depends: R (>= 2.14.0), stats, graphics
Imports: nlme, methods, Matrix
......@@ -13,6 +14,6 @@ Suggests: nlme (>= 3.1-64), splines, Matrix, parallel
LazyLoad: yes
ByteCompile: yes
License: GPL (>= 2)
Packaged: 2012-01-21 16:33:06 UTC; sw283
Packaged: 2012-04-30 07:09:06 UTC; sw283
Repository: CRAN
Date/Publication: 2012-01-22 09:59:42
Date/Publication: 2012-04-30 08:22:16
5d5d72ee6d284c96b4525e9eb748bc0f *DESCRIPTION
29e13076f8e7c500f10e2b64b0821984 *NAMESPACE
ecfb144fb5214dde68dffac22f219a1f *R/bam.r
9f562aa60504f1265daa8ff8095b6333 *DESCRIPTION
50152051f123a389d421aa3130dce252 *NAMESPACE
cf4210b25d2ece355a79cb8ed5e4455a *R/bam.r
f4f5c9bb8776c2248e088c9ee3517208 *R/fast-REML.r
b160632e8f38fa99470e2f8cba82a495 *R/gam.fit3.r
902657a0ee2dedc3fdfa501bf3b37c5b *R/gam.sim.r
e137c06cabb48551c18cf0cc3512d297 *R/gamm.r
c61836edb704dbd7b718c754d714a291 *R/mgcv.r
bf70158e37e33ea1136efdeab97569f8 *R/plots.r
d20083082ba7e1bf361ac7d404efd8a3 *R/smooth.r
fe9745f610246ee1f31eb915ca0d76a9 *R/sparse.r
76637934ae66a4b74a0637e698f71469 *changeLog
ad57e83090b4633ee50041fd3571c016 *R/gamm.r
a7d790a4fe2640fd69646e1dcf161d80 *R/mgcv.r
5c5f68e76697c356b95e084bee5d7776 *R/plots.r
bb2b4220a103364afc87249157a040b7 *R/smooth.r
fb66d6c18398411a99ffcb788b854f13 *R/sparse.r
020a4b9253d806cb55b0521412715dc7 *changeLog
e468195a83fab90da8e760c2c3884bd3 *data/columb.polys.rda
40874e3ced720a596750f499ded8a60a *data/columb.rda
88d77139cc983317b6acd8c5f1252ab9 *gnugpl2.txt
......@@ -18,10 +19,10 @@ f693920e12f8a6f2b6cab93648628150 *index
c51c9b8c9c73f81895176ded39b91394 *man/Predict.matrix.cr.smooth.Rd
612ab6354541ebe38a242634d73b66ba *man/Tweedie.Rd
6d711de718a09e1e1ae2a6967abade33 *man/anova.gam.Rd
c4d1ad309698994e7c1c75f7db294a58 *man/bam.Rd
fa6e8f98dc01de508e95d1008ae84d60 *man/bam.Rd
b385d6d5419d0d6aefe03af1a79d5c4e *man/bam.update.Rd
4e925cb579f4693d1b8ec2d5092c0b37 *man/cSplineDes.Rd
9753a8051d9b495d9855537f3f26f491 *man/choose.k.Rd
9b4d616d1b6c4a46ca77d16cded3f806 *man/choose.k.Rd
c03748964ef606621418e428ae49b103 *man/columb.Rd
4196ba59f1fa8449c9cd0cab8a347978 *man/concurvity.Rd
f764fb7cb9e63ff341a0075a3854ab5d *man/exclude.too.far.Rd
......@@ -29,20 +30,20 @@ f764fb7cb9e63ff341a0075a3854ab5d *man/exclude.too.far.Rd
44ad0563add1c560027d502ce41483f5 *man/fix.family.link.Rd
75373268c1203ee110e1eede633752aa *man/fixDependence.Rd
9ac808f5a2a43cf97f24798c0922c9bf *man/formXtViX.Rd
34308f4ada8e2aca9981a17794dac30b *man/formula.gam.Rd
bb099e6320a6c1bd79fe4bf59e0fde08 *man/formula.gam.Rd
6f405acde2d7b6f464cf45f5395113ba *man/full.score.Rd
c7f0549fe7b9da0624417e33ed92344d *man/gam.Rd
aeb7ec80d75244bc4f2f2fd796f86efd *man/gam.check.Rd
847599e287ecf79fbb7be2cb06d72742 *man/gam.control.Rd
39b4fa3782cc33a445d34a9b26df44f8 *man/gam.Rd
69c6ef61a3cfc397cbeacd21e5e6cc9b *man/gam.check.Rd
96c9417e4ac5d79ec9ed3f363adfc4e9 *man/gam.control.Rd
fd98327327ba74bb1a61a6519f12e936 *man/gam.convergence.Rd
58ab3b3d6f4fd0d008d73c3c4e6d3305 *man/gam.fit.Rd
32b5cd1b6f63027150817077f3914cf4 *man/gam.fit3.Rd
21339a5d1eb8c83679dd9022ab682b5e *man/gam.fit3.Rd
dd35a8a851460c2d2106c03d544c8241 *man/gam.models.Rd
468d116a2ef9e60f683af48f4f100ef5 *man/gam.outer.Rd
7e5ba69a44bc937ddca04e4f153c7975 *man/gam.selection.Rd
e969287d1a5c281faa7eb6cfce31a7c5 *man/gam.outer.Rd
96676186808802344a99f9d3170bf775 *man/gam.selection.Rd
76651917bd61fc6bc447bbb40b887236 *man/gam.side.Rd
78588cf8ed0af8eca70bba3bbed64dbe *man/gam.vcomp.Rd
278e0b3aa7baa44dfb96e235ceb07f4c *man/gam2objective.Rd
a66a814cc4c6f806e824751fda519ae0 *man/gam2objective.Rd
4d5b3b1266edc31ce3b0e6be11ee9166 *man/gamObject.Rd
0ac5fb78c9db628ce554a8f68588058c *man/gamSim.Rd
6078c49c55f4e7ce20704e4fbe3bba8a *man/gamm.Rd
......@@ -55,30 +56,29 @@ aba56a0341ba9526a302e39d33aa9042 *man/interpret.gam.Rd
58e73ac26b93dc9d28bb27c8699e12cf *man/linear.functional.terms.Rd
5de18c3ad064a5bda4f9027d9455170a *man/logLik.gam.Rd
611f5f6acac9c5f40869c01cf7f75dd3 *man/ls.size.Rd
8ef61987727e1b857edf3a366d21b66c *man/magic.Rd
c4d7e46cead583732e391d680fecc572 *man/magic.Rd
496388445d8cde9b8e0c3917cbe7461d *man/magic.post.proc.Rd
5c55658a478bd34d66daad46e324d7f4 *man/mgcv-FAQ.Rd
904b19ba280010d85d59a4b21b6d2f94 *man/mgcv-package.Rd
196ad09f09d6a5a44078e2282eb0a56f *man/mgcv.Rd
bb420a39f1f8155f0084eb9260fad89c *man/mgcv.control.Rd
d564d1c5b2f780844ff10125348f2e2c *man/mgcv-FAQ.Rd
41df245a5821b3964db4c74b1930c0fe *man/mgcv-package.Rd
18a9858b6f3ffde288b0bf9e1a5da2f6 *man/model.matrix.gam.Rd
3edd2618dcb4b366eeb405d77f3f633c *man/mono.con.Rd
bc9b89db7e7ff246749551c16f5f1f07 *man/mono.con.Rd
3a4090ac778273861d97077681a55df2 *man/mroot.Rd
8aea04d0764d195409da798b33516051 *man/negbin.Rd
41de8762baab4fc0cf1224df168520fe *man/new.name.Rd
dffa2d51c704c610088fa02d7220b05e *man/notExp.Rd
150d7f8a427117353c5c2e466ff0bfae *man/notExp2.Rd
95b3e6686e9557b3278e21e350704ce9 *man/null.space.dimension.Rd
3720c8867aa31d7705dae102eeaa2364 *man/pcls.Rd
19939543d691f128e84d86fb5423541e *man/pcls.Rd
717d796acbaab64216564daf898b6d04 *man/pdIdnot.Rd
8c0f8575b427f30316b639a326193aeb *man/pdTens.Rd
b388d29148264fd3cd636391fde87a83 *man/pen.edf.Rd
de454d1dc268bda008ff46639a89acec *man/place.knots.Rd
ced71ada93376fcdffa28ad08009cf49 *man/plot.gam.Rd
84d54e8081b82cb8d96a33de03741843 *man/plot.gam.Rd
3d1484b6c3c2ea93efe41f6fc3801b8d *man/polys.plot.Rd
fdd6b7e03fde145e274699fe9ea8996c *man/predict.gam.Rd
afca36f5b1a5d06a7fcab2eaaa029e7e *man/predict.bam.Rd
df63d7045f83a1dc4874fcac18a2303c *man/predict.gam.Rd
a594eb641cae6ba0b83d094acf4a4f81 *man/print.gam.Rd
d837c87f037760c81906a51635476298 *man/qq.gam.Rd
5311a1e83ae93aef5f9ae38f7492536a *man/qq.gam.Rd
f77ca1471881d2f93c74864d076c0a0e *man/rTweedie.Rd
827743e1465089a859a877942ba2f4a9 *man/random.effects.Rd
37669f97e17507f3ae2d6d1d74feb9d7 *man/residuals.gam.Rd
......@@ -97,12 +97,12 @@ d202c6718fb1138fdd99e6102250aedf *man/smooth.construct.re.smooth.spec.Rd
8672633a1fad8df3cb1f53d7fa883620 *man/smooth.construct.tensor.smooth.spec.Rd
4b9bd43c3acbab6ab0159d59967e19db *man/smooth.construct.tp.smooth.spec.Rd
1de9c315702476fd405a85663bb32d1c *man/smooth.terms.Rd
6aa3bcbd3198d2bbc3b9ca12c9c9cd7e *man/smoothCon.Rd
0d12daea17e0b7aef8ab89b5f801adf1 *man/smoothCon.Rd
5ae47a140393009e3dba7557af175170 *man/sp.vcov.Rd
83bd8e097711bf5bd0fff09822743d43 *man/spasm.construct.Rd
a17981f0fa2a6a50e637c98c672bfc45 *man/step.gam.Rd
700699103b50f40d17d3824e35522c85 *man/step.gam.Rd
dd54c87fb87c284d3894410f50550047 *man/summary.gam.Rd
22b571cbc0bd1e31f195ad927434c27e *man/t2.Rd
7f383eaaca246c8bf2d5b74d841f7f8a *man/t2.Rd
04076444b2c99e9287c080298f9dc1d7 *man/te.Rd
c3c23641875a293593fe4ef032b44aae *man/tensor.prod.model.matrix.Rd
fbd45cbb1931bdb5c0de044e22fdd028 *man/uniquecombs.Rd
......@@ -117,20 +117,18 @@ becbe3e1f1588f7292a74a97ef07a9ae *po/R-de.po
1a4a267ddcb87bb83f09c291d3e97523 *po/fr.po
813514ea4e046ecb4563eb3ae8aa202a *po/mgcv.pot
cd54024d76a9b53dc17ef26323fc053f *src/Makevars
a25e39145f032e8e37433651bba92ddf *src/gcv.c
2798411be2cb3748b8bd739f2d2016ee *src/gcv.h
d40012dcda1a10ee535a9b3de9b46c19 *src/gdi.c
94a2bcbb75cc60e8460e72ed154678c9 *src/gdi.c
49af97195accb65adc75620183d39a4c *src/general.h
da280ee5538a828afde0a4f6c7b8328a *src/init.c
8b37eb0db498a3867dc83364dc65f146 *src/magic.c
6f301e977834b4743728346184ea11ba *src/init.c
7f9fcb495707a003817e78f4802ceeba *src/magic.c
066af9db587e5fe6e5cc4ff8c09ae9c2 *src/mat.c
d21847ac9a1f91ee9446c70bd93a490a *src/matrix.c
54ce9309b17024ca524e279612a869d6 *src/matrix.h
08c94a2af4cd047ecd79871ecbafe33a *src/mgcv.c
99204b3b20c2e475d9e14022e0144804 *src/mgcv.h
2a1c4f1c10510a4338e5cc34defa65f6 *src/misc.c
de0ae24ea5cb533640a3ab57e0383595 *src/matrix.c
0f8448f67d16668f9027084a2d9a1b52 *src/matrix.h
6a9f57b44d2aab43aa32b01ccb26bd6e *src/mgcv.c
c62652f45ad1cd3624a849005858723a *src/mgcv.h
fcbe85d667f8c7818d17509a0c3c5935 *src/misc.c
7e0ba698a21a01150fda519661ef9857 *src/qp.c
cd563899be5b09897d1bf36a7889caa0 *src/qp.h
e9cab4a461eb8e086a0e4834cbf16f30 *src/sparse-smooth.c
3a251ecac78b25c315de459cd2ba0b04 *src/tprs.c
d0531330f4c1209a1cdd7a75b1854724 *src/tprs.h
985ef1e19c7b5d97b8e29ed78e709fc5 *src/tprs.c
5352d5d2298acd9b03ee1895933d4fb4 *src/tprs.h
......@@ -12,7 +12,7 @@ export(anova.gam, bam, bam.update, concurvity, cSplineDes,
gam.side,
get.var,ldTweedie,
initial.sp,logLik.gam,ls.size,
magic, magic.post.proc, mgcv, mgcv.control, model.matrix.gam,
magic, magic.post.proc, model.matrix.gam,
mono.con, mroot, negbin, new.name,
notExp,notExp2,notLog,notLog2,pcls,null.space.dimension,
pen.edf,pdIdnot,pdTens,
......
## routines for very large dataset generalized additive modelling.
## (c) Simon N. Wood 2009-2011
## (c) Simon N. Wood 2009-2012
ls.size <- function(x) {
......@@ -27,23 +27,53 @@ rwMatrix <- function(stop,row,weight,X) {
return(oo$X)
}
chol2qr <- function(XX,Xy) {
## takes X'X and X'y and returns R and f
## equivalent to qr update. Uses simple
## regularization if XX not +ve def.
XXeps <- norm(XX)*.Machine$double.eps^.9
n <- ncol(XX)
for (i in 0:20) {
ok <- TRUE
R <- try(chol(XX+diag(n)*XXeps*i),silent=TRUE) ## R'R = X'X
if (inherits(R,"try-error")) ok <- FALSE else {
f <- try(forwardsolve(t(R),Xy),silent=TRUE)
if (inherits(f,"try-error")) ok <- FALSE
}
if (ok) break; ## success
}
if (i==20 && !ok) stop("Choleski based method failed, switch to QR")
list(R=R,f=f)
}
qr.update <- function(Xn,yn,R=matrix(0,0,ncol(Xn)),f=array(0,0),y.norm2=0)
qr.update <- function(Xn,yn,R=NULL,f=rep(0,0),y.norm2=0,use.chol=FALSE)
## Let X = QR and f = Q'y. This routine updates f and R
## when Xn is appended to X and yn appended to y. If R has no rows
## then initial QR of Xn is performed. ||y||^2 is accumulated as well
{ #qrx <- qr(Xn)
p <- ncol(Xn)
#fn <- qr.qty(qrx,yn)[1:p]
## when Xn is appended to X and yn appended to y. If R is NULL
## then initial QR of Xn is performed. ||y||^2 is accumulated as well.
## if use.chol==TRUE then quicker but less stable accumulation of X'X and
## X'y are used. Results then need post processing, to get R =chol(X'X)
## and f= R^{-1} X'y.
{ p <- ncol(Xn)
y.norm2 <- y.norm2+sum(yn*yn)
if (nrow(R)) {
Xn <- rbind(R,Xn)
yn <- c(f,yn)
if (use.chol) {
if (is.null(R)) {
R <- crossprod(Xn)
fn <- as.numeric(t(Xn)%*%yn)
} else {
R <- R + crossprod(Xn)
fn <- f + as.numeric(t(Xn)%*%yn)
}
return(list(R=R,f=fn,y.norm2=y.norm2))
} else { ## QR update
if (!is.null(R)) {
Xn <- rbind(R,Xn)
yn <- c(f,yn)
}
qrx <- qr(Xn,tol=0,LAPACK=TRUE)
fn <- qr.qty(qrx,yn)[1:p]
rp <- qrx$pivot;rp[rp] <- 1:p # reverse pivot
return(list(R = qr.R(qrx)[,rp],f=fn,y.norm2=y.norm2))
}
qrx <- qr(Xn,tol=0)
fn <- qr.qty(qrx,yn)[1:p]
list(R = qr.R(qrx),f=fn,y.norm2=y.norm2)
}
......@@ -54,8 +84,8 @@ qr.up <- function(arg) {
dev <- 0
for (b in 1:arg$n.block) {
ind <- arg$start[b]:arg$stop[b]
arg$G$model <- arg$mf[ind,]
X <- predict(arg$G,type="lpmatrix")
##arg$G$model <- arg$mf[ind,]
X <- predict(arg$G,newdata=arg$mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
if (is.null(arg$coef)) eta1 <- arg$eta[ind] else eta1 <- drop(X%*%arg$coef) + arg$offset[ind]
mu <- arg$linkinv(eta1)
y <- arg$G$y[ind] ## arg$G$model[[arg$response]]
......@@ -67,8 +97,8 @@ qr.up <- function(arg) {
dev <- dev + sum(arg$dev.resids(y,mu,weights))
wt <- c(wt,w)
w <- sqrt(w)
if (b == 1) qrx <- qr.update(w*X[good,],w*z)
else qrx <- qr.update(w*X[good,],w*z,qrx$R,qrx$f,qrx$y.norm2)
if (b == 1) qrx <- qr.update(w*X[good,],w*z,use.chol=arg$use.chol)
else qrx <- qr.update(w*X[good,],w*z,qrx$R,qrx$f,qrx$y.norm2,use.chol=arg$use.chol)
rm(X);if(arg$gc.level>1) gc() ## X can be large: remove and reclaim
}
qrx$dev <- dev;qrx$wt <- wt
......@@ -110,9 +140,9 @@ mini.mf <-function(mf,chunk.size) {
mf0
}
bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NULL,
mustart = NULL, offset = rep(0, nobs),
control = gam.control(), intercept = TRUE, cl = NULL,gc.level=0)
bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, coef=NULL,etastart = NULL,
mustart = NULL, offset = rep(0, nobs), control = gam.control(), intercept = TRUE,
cl = NULL,gc.level=0,use.chol=FALSE,nobs.extra=0,samfrac=1)
{ y <- mf[[gp$response]]
weights <- G$w
conv <- FALSE
......@@ -162,7 +192,7 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
G$y <- y
G$w <- weights
## set up cluster for parallel coputation...
## set up cluster for parallel computation...
if (!is.null(cl)&&inherits(cl,"cluster")) {
n.threads <- length(cl)
......@@ -197,7 +227,7 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
arg[[i]] <- list(nobs= nt[i],start=start,stop=stop,n.block=n.block,
linkinv=linkinv,dev.resids=dev.resids,gc.level=gc.level,
mu.eta=mu.eta,variance=variance,mf = mf[ind,],
eta = eta[ind],offset = offset[ind],G = G)
eta = eta[ind],offset = offset[ind],G = G,use.chol=use.chol)
arg[[i]]$G$w <- G$w[ind];arg[[i]]$G$model <- NULL
arg[[i]]$G$y <- G$y[ind]
}
......@@ -221,7 +251,9 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
} ## single thread indices complete
conv <- FALSE
if (method=="fREML") Sl <- Sl.setup(G) ## setup block diagonal penalty object
for (iter in 1L:control$maxit) { ## main fitting loop
## accumulate the QR decomposition of the weighted model matrix
wt <- rep(0,0)
......@@ -231,9 +263,9 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
for (b in 1:n.block) {
ind <- start[b]:stop[b]
G$model <- mf[ind,]
X <- predict(G,type="lpmatrix")
if (iter>1) eta1 <- drop(X%*%coef) + offset[ind] else eta1 <- eta[ind]
##G$model <- mf[ind,]
X <- predict(G,newdata=mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
if (is.null(coef)) eta1 <- eta[ind] else eta1 <- drop(X%*%coef) + offset[ind]
mu <- linkinv(eta1)
y <- G$y[ind] ## G$model[[gp$response]] ## - G$offset[ind]
weights <- G$w[ind]
......@@ -244,12 +276,17 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
dev <- dev + sum(dev.resids(y,mu,weights))
wt <- c(wt,w)
w <- sqrt(w)
if (b == 1) qrx <- qr.update(w*X[good,],w*z)
else qrx <- qr.update(w*X[good,],w*z,qrx$R,qrx$f,qrx$y.norm2)
if (b == 1) qrx <- qr.update(w*X[good,],w*z,use.chol=use.chol)
else qrx <- qr.update(w*X[good,],w*z,qrx$R,qrx$f,qrx$y.norm2,use.chol=use.chol)
rm(X);if(gc.level>1) gc() ## X can be large: remove and reclaim
}
if (use.chol) { ## post proc to get R and f...
y.norm2 <- qrx$y.norm2
qrx <- chol2qr(qrx$R,qrx$f)
qrx$y.norm2 <- y.norm2
}
} else { ## use new parallel accumulation
if (iter>1) for (i in 1:length(arg)) arg[[i]]$coef <- coef
for (i in 1:length(arg)) arg[[i]]$coef <- coef
res <- parLapply(cl,arg,qr.up)
## single thread debugging version
#res <- list()
......@@ -257,21 +294,40 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
# res[[i]] <- qr.up(arg[[i]])
#}
## now consolidate the results from the parallel threads...
R <- res[[1]]$R;f <- res[[1]]$f;dev <- res[[1]]$dev
wt <- res[[1]]$wt;y.norm2 <- res[[1]]$y.norm2
for (i in 2:n.threads) {
R <- rbind(R,res[[i]]$R); f <- c(f,res[[i]]$f)
wt <- c(wt,res[[i]]$wt); dev <- dev + res[[i]]$dev
y.norm2 <- y.norm2 + res[[i]]$y.norm2
}
qrx <- qr(R,tol=0)
f <- qr.qty(qrx,f)[1:ncol(R)]
qrx <- list(R=qr.R(qrx),f=f,y.norm2=y.norm2)
if (use.chol) {
R <- res[[1]]$R;f <- res[[1]]$f;dev <- res[[1]]$dev
wt <- res[[1]]$wt;y.norm2 <- res[[1]]$y.norm2
for (i in 2:n.threads) {
R <- R + res[[i]]$R; f <- f + res[[i]]$f
wt <- c(wt,res[[i]]$wt); dev <- dev + res[[i]]$dev
y.norm2 <- y.norm2 + res[[i]]$y.norm2
}
qrx <- chol2qr(R,f)
qrx$y.norm2 <- y.norm2
} else { ## proper QR
R <- res[[1]]$R;f <- res[[1]]$f;dev <- res[[1]]$dev
wt <- res[[1]]$wt;y.norm2 <- res[[1]]$y.norm2
for (i in 2:n.threads) {
R <- rbind(R,res[[i]]$R); f <- c(f,res[[i]]$f)
wt <- c(wt,res[[i]]$wt); dev <- dev + res[[i]]$dev
y.norm2 <- y.norm2 + res[[i]]$y.norm2
}
qrx <- qr(R,tol=0,LAPACK=TRUE)
f <- qr.qty(qrx,f)[1:ncol(R)]
rp <- qrx$pivot;rp[rp] <- 1:ncol(R) # reverse pivot
qrx <- list(R=qr.R(qrx)[,rp],f=f,y.norm2=y.norm2)
}
}
## if the routine has been called with only a random sample of the data, then
## R, f and ||y||^2 can be corrected to estimate the full versions...
qrx$R <- qrx$R/sqrt(samfrac)
qrx$f <- qrx$f/sqrt(samfrac)
qrx$y.norm2 <- qrx$y.norm2/samfrac
G$n <- nobs
#G$y <- mf[[gp$response]]
rss.extra <- qrx$y.norm2 - sum(qrx$f^2)
if (control$trace)
......@@ -289,10 +345,40 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
if (method=="GCV.Cp") {
fit <- magic(qrx$f,qrx$R,G$sp,G$S,G$off,L=G$L,lsp0=G$lsp0,rank=G$rank,
H=G$H,C=G$C,gamma=gamma,scale=scale,gcv=(scale<=0),
extra.rss=rss.extra,n.score=G$n)
extra.rss=rss.extra,n.score=nobs+nobs.extra)
post <- magic.post.proc(qrx$R,fit,qrx$f*0+1)
} else { ## method is "REML" or "ML"
} else if (method=="fREML") { ## use fast REML code
## block diagonal penalty object, Sl, set up before loop
um <- Sl.Xprep(Sl,qrx$R)
lambda.0 <- initial.sp(qrx$R,G$S,G$off)
lsp0 <- log(lambda.0) ## initial s.p.
## carry forward scale estimate if possible...
if (scale>0) log.phi <- log(scale) else {
if (iter>1) log.phi <- log(object$scale) else {
if (is.null(coef)||qrx$y.norm2==0) log.phi <- log(var(G$y)*.05) else
log.phi <- log(qrx$y.norm2/(nobs+nobs.extra))
}
}
fit <- fast.REML.fit(um$Sl,um$X,qrx$f,rho=lsp0,L=G$L,rho.0=G$lsp0,
log.phi=log.phi,phi.fixed=scale>0,rss.extra=rss.extra,
nobs =nobs+nobs.extra,Mp=um$Mp)
res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=FALSE)
object <- list(coefficients=res$beta,full.sp = exp(fit$rho.full),
gcv.ubre=fit$reml,mgcv.conv=list(iter=fit$iter,
message=fit$conv),rank=ncol(um$X),
Ve=NULL,scale.estimated = scale<=0,outer.info=fit$outer.info,
optimizer=c("perf","newton"))
if (scale<=0) { ## get sp's and scale estimate
nsp <- length(fit$rho)
object$sig2 <- object$scale <- exp(fit$rho[nsp])
object$sp <- exp(fit$rho[-nsp])
} else { ## get sp's
object$sig2 <- object$scale <- scale
object$sp <- exp(fit$rho)
}
class(object)<-c("gam")
} else { ## method is one of "ML", "P-REML" etc...
y <- G$y; w <- G$w; n <- G$n;offset <- G$offset
G$y <- qrx$f
G$w <- G$y*0+1
......@@ -301,7 +387,7 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
G$offset <- G$y*0
G$dev.extra <- rss.extra
G$pearson.extra <- rss.extra
G$n.true <- n
G$n.true <- nobs+nobs.extra
object <- gam(G=G,method=method,gamma=gamma,scale=scale)
y -> G$y; w -> G$w; n -> G$n;offset -> G$offset
}
......@@ -332,7 +418,14 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
iter)
break
}
} ## fitting iteration
} ## end fitting iteration
if (method=="fREML") { ## do expensive cov matrix cal only at end
res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=TRUE,scale=scale)
object$edf <- res$edf
object$hat <- res$hat
object$Vp <- res$V
}
if (!conv)
warning("algorithm did not converge")
......@@ -347,13 +440,7 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
warning("fitted rates numerically 0 occurred")
}
# wtdmu <- if (intercept)
# sum(weights * y)/sum(weights)
# else linkinv(offset)
# nulldev <- sum(dev.resids(y, wtdmu, weights))
# if (n.threads!=1) stopCluster(cl)
object$iter <- iter
object$wt <- wt
object$y <- G$y
rm(G);if (gc.level>0) gc()
......@@ -363,8 +450,7 @@ bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NUL
bgam.fit2 <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NULL,
mustart = NULL, offset = rep(0, nobs),
control = gam.control(), intercept = TRUE)
mustart = NULL, offset = rep(0, nobs), control = gam.control(), intercept = TRUE)
## version using sparse full model matrix in place of QR update...
## not multi-threaded, due to anyway disappointing performance
{ G$y <- y <- mf[[gp$response]]
......@@ -518,7 +604,7 @@ bgam.fit2 <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NU
warning("fitted rates numerically 0 occurred")
}
object$iter <- iter
object$wt <- w
object$y <- G$y
rm(G);gc()
......@@ -526,13 +612,13 @@ bgam.fit2 <- function (G, mf, chunk.size, gp ,scale ,gamma,method, etastart = NU
} ## end bgam.fit2
ar.qr.up <- function(arg) {
## function to perform QR updating with AR reiduals, on one execution thread
## function to perform QR updating with AR residuals, on one execution thread
if (arg$rho!=0) { ## AR1 error model
ld <- 1/sqrt(1 - arg$rho^2) ## leading diagonal of root inverse correlation
sd <- -arg$rho * ld ## sub diagonal
}
yX.last <- NULL
qrx <- list(R=matrix(0,0,ncol(arg$G$X)),f=array(0,0),y.norm2=0) ## initial empty qr object
qrx <- list(R=NULL,f=array(0,0),y.norm2=0) ## initial empty qr object
for (i in 1:arg$n.block) {
ind <- arg$start[i]:arg$end[i]
if (arg$rho!=0) { ## have to find AR1 transform...
......@@ -543,10 +629,10 @@ ar.qr.up <- function(arg) {
weight <- c(1,rep(c(sd,ld),N-1))
stop <- c(1,1:(N-1)*2+1)
}
arg$G$model <- arg$mf[ind,]
## arg$G$model <- arg$mf[ind,]
w <- sqrt(arg$G$w[ind])
X <- w*predict(arg$G,type="lpmatrix")
y <- w*(arg$G$model[[arg$response]] - arg$offset[ind])
X <- w*predict(arg$G,newdata=arg$mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
y <- w*(arg$mf[ind,arg$response] - arg$offset[ind]) ## w*(arg$G$model[[arg$response]] - arg$offset[ind])
if (arg$rho!=0) {
## Apply transform...
if (arg$last&&arg$end[i]==arg$nobs) yX.last <-
......@@ -559,7 +645,7 @@ ar.qr.up <- function(arg) {
y <- rwMatrix(stop,row,weight,y)[-1]
}
} ## dealt with AR1
qrx <- qr.update(X,y,qrx$R,qrx$f,qrx$y.norm2)
qrx <- qr.update(X,y,qrx$R,qrx$f,qrx$y.norm2,use.chol=arg$use.chol)
rm(X);if (arg$gc.level>1) {gc()} ## X can be large: remove and reclaim
} ## all blocks dealt with
qrx$yX.last <- yX.last
......@@ -567,7 +653,84 @@ ar.qr.up <- function(arg) {
qrx
}
bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level=0)
pabapr <- function(arg) {
## function for parallel calling of predict.gam
## QUERY: ... handling?
predict.gam(arg$object,newdata=arg$newdata,type=arg$type,se.fit=arg$se.fit,terms=arg$terms,
block.size=arg$block.size,newdata.guaranteed=arg$newdata.guaranteed,
na.action=arg$na.action)
}
predict.bam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,
block.size=50000,newdata.guaranteed=FALSE,na.action=na.pass,
cluster=NULL,...) {
## function for prediction from a bam object, possibly in parallel
if (!is.null(cluster)&&inherits(cluster,"cluster")) {
require(parallel)
n.threads <- length(cluster)
} else n.threads <- 1
if (missing(newdata)) n <- nrow(object$model) else n <- nrow(newdata)
if (n < 100*n.threads) n.threads <- 1 ## not worth the overheads
if (n.threads==1) { ## single threaded call
if (missing(newdata)) return(
predict.gam(object,newdata=object$model,type=type,se.fit=se.fit,terms=terms,
block.size=block.size,newdata.guaranteed=newdata.guaranteed,
na.action=na.action,...)
) else return(
predict.gam(object,newdata=newdata,type=type,se.fit=se.fit,terms=terms,
block.size=block.size,newdata.guaranteed=newdata.guaranteed,
na.action=na.action,...))
} else { ## parallel call...
nt <- rep(floor(n/n.threads),n.threads)
nt[1] <- n - sum(nt[-1])
arg <- list()
n1 <- 0
for (i in 1:n.threads) {
n0 <- n1+1;n1 <- n1+nt[i]
ind <- n0:n1 ## this thread's data block from mf
arg[[i]] <- list(object=object,type=type,se.fit=se.fit,terms=terms,
block.size=block.size,newdata.guaranteed=newdata.guaranteed,
na.action=na.action)
arg[[i]]$object$model <- object$model[1:2,] ## save space
if (missing(newdata)) {
arg[[i]]$newdata <- object$model[ind,]
} else {
arg[[i]]$newdata <- newdata[ind,]
}
} ## finished setting up arguments
res <- parLapply(cluster,arg,pabapr) ## perform parallel prediction
## and splice results back together...
if (type=="lpmatrix") {
X <- res[[1]]
for (i in 2:length(res)) X <- rbind(X,res[[i]])
return(X)
} else if (se.fit==TRUE) {
rt <- list(fit = res[[1]]$fit,se.fit = res[[1]]$se.fit)
if (type=="terms") {
for (i in 2:length(res)) {
rt$fit <- rbind(rt$fit,res[[i]]$fit)
rt$se.fit <- rbind(rt$se.fit,res[[i]]$se.fit)
}
} else {
for (i in 2:length(res)) {
rt$fit <- c(rt$fit,res[[i]]$fit)
rt$se.fit <- c(rt$se.fit,res[[i]]$se.fit)
}
}
return(rt)
} else { ## no se's returned
rt <- res[[1]]
if (type=="terms") {
for (i in 2:length(res)) rt <- rbind(rt,res[[i]])
} else {
for (i in 2:length(res)) rt <- c(rt,res[[i]])
}
return(rt)
}
}
} ## end predict.bam
bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level=0,use.chol=FALSE)
## function that does big additive model fit in strictly additive case
{ ## first perform the QR decomposition, blockwise....
n <- nrow(mf)
......@@ -620,7 +783,7 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level
arg[[i]] <- list(nobs= nt[i],start=start,end=end,n.block=n.block,
rho=rho,mf = mf[ind,],gc.level=gc.level,
offset = G$offset[ind],G = G,response=gp$response,
first=FALSE,last=FALSE)
first=FALSE,last=FALSE,use.chol=use.chol)
if (i==1) arg[[1]]$first <- TRUE
if (i==n.threads) arg[[i]]$last <- TRUE
arg[[i]]$G$w <- G$w[ind];arg[[i]]$G$model <- NULL
......@@ -637,7 +800,7 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level
}
if (n.threads==1) { ## use original single thread method...
qrx <- list(R=matrix(0,0,ncol(G$X)),f=array(0,0),y.norm2=0) ## initial empty qr object
qrx <- list(R=NULL,f=array(0,0),y.norm2=0) ## initial empty qr object
for (i in 1:n.block) {
ind <- start[i]:end[i]
if (rho!=0) {
......@@ -647,10 +810,10 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level
weight <- c(1,rep(c(sd,ld),N-1))
stop <- c(1,1:(N-1)*2+1)
}
G$model <- mf[ind,]
#G$model <- mf[ind,]
w <- sqrt(G$w[ind])
X <- w*predict(G,type="lpmatrix")
y <- w*(G$model[[gp$response]] - G$offset[ind])
X <- w*predict(G,newdata=mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
y <- w*(mf[ind,gp$response]-G$offset[ind]) ## w*(G$model[[gp$response]] - G$offset[ind])
if (rho!=0) {
## Apply transform...
if (end[i]==n) yX.last <- c(y[nrow(X)],X[nrow(X),]) ## store final row, in case of update
......@@ -663,10 +826,15 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level
}
}
qrx <- qr.update(X,y,qrx$R,qrx$f,qrx$y.norm2)
qrx <- qr.update(X,y,qrx$R,qrx$f,qrx$y.norm2,use.chol=use.chol)
rm(X)
if (gc.level>1) {gc()} ## X can be large: remove and reclaim
} ## end of single thread block loop
if (use.chol) { ## post proc to get R and f...
y.norm2 <- qrx$y.norm2
qrx <- chol2qr(qrx$R,qrx$f)
qrx$y.norm2 <- y.norm2
}
} else { ## use parallel accumulation
res <- parLapply(cl,arg,ar.qr.up)
......@@ -680,28 +848,43 @@ bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,cl=NULL,gc.level
R <- res[[1]]$R;f <- res[[1]]$f;dev <- res[[1]]$dev
y.norm2 <- res[[1]]$y.norm2
for (i in 2:n.threads) {
R <- rbind(R,res[[i]]$R); f <- c(f,res[[i]]$f)