Commit 2b1bc647 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.8-16

parent b90a627e
......@@ -5,6 +5,53 @@ Currently deprecated and liable to be removed:
- single penalty tensor product smooths.
- p.type!=0 in summary.gam.
1.8-16
* slightly improved intial value heuristics for overlapping penalties in
general family case.
* 'ocat' checks that response is numeric.
* plot.gam(...,scale=-1) now changes scale according to 'trans' and 'shift'.
* newton optimizer made slightly more cautious: contracts step if reduction
in true objective too far different from reduction predicted by quadratic
approximation underlying Newton step. Also leaves parameters unchanged
in Newton step while their grad is less than 1% of max grad.
* Fix to Fisher weight computation in gam.fit4. Previously a weight could
(rarely) evaluate as negative machine prec instead of zero, get passed to
gdi2 in C code, generate a NaN when square rooted, resulting in a NaN passed
to the LAPACK dgeqp3 routine, which then hung in a non-interuptable way.
* Fix of 'sp' argument handling with multiple formulae. Allocation to terms
could be incorrect.
* Option 'edge.correct' added to 'gam.control' to allow better correction
of edge of smoothing parameter space effects with 'gam' when RE/ML used.
* Fix to setting of penalty rank in smooth.construct.mrf.smooth.spec.
Previously this was wrong, which could cause failure with gamm if the
penalty was rank deficient. Thanks Paul Buerkner.
* Fix to Vb.corr call from gam.fit3.post.proc to ensure that sp not
dropped (wrongly treated as scale estimate) when P-REML or P-ML used.
Could cause failure depending on BLAS. Thanks Matteo Fasiolo.
* Fix in gam.outer that caused failure with "efs" optimizer and fixed sps.
* Fix to `get.var' to drop matrix attributes of 1 column matrix variables.
* Extra argument added to `uniquecombs' to allow result to have same row
ordering regardless of input data ordering. Now used by smooth constructors
that subsample unique covariate values during basis setup to ensure
invariance to data re-ordering.
* Correction of scaling error in spherical correlation structure GP smooth.
* qf and rd functions for binomial family fixed for zero n case.
1.8-15
* Fix of survival function prediction in cox.ph family. Code used expression
......
Package: mgcv
Version: 1.8-15
Version: 1.8-16
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: Mixed GAM Computation Vehicle with GCV/AIC/REML Smoothness
......@@ -16,6 +16,6 @@ LazyLoad: yes
ByteCompile: yes
License: GPL (>= 2)
NeedsCompilation: yes
Packaged: 2016-09-13 10:00:39 UTC; sw283
Packaged: 2016-11-07 10:22:00 UTC; sw283
Repository: CRAN
Date/Publication: 2016-09-14 18:51:00
Date/Publication: 2016-11-07 19:28:16
0d9b05a5c3954f23ca64d831c45daf89 *ChangeLog
992b7d6af50703c1753fe3dd458a9df3 *DESCRIPTION
a38e3e6c9f0ae98bdcf3d14127347fde *ChangeLog
e051228d6327e0b3d640616d4fa510c3 *DESCRIPTION
eb723b61539feef013de476e68b5c50a *GPL-2
595f7fa74dd678ae4be1f6968e174555 *NAMESPACE
41b51c0155f4cb0b9d746d896108c160 *R/bam.r
1aee82e17a9feb6166dbe107659029c9 *R/bam.r
87933a9e2845f871334d96f537ee11e9 *R/coxph.r
fdb1dd621eb177107bbfb5f5c11777b2 *R/efam.r
a25f5a55d9d8b4ada56ac0df3ec99a18 *R/efam.r
4b370253beb1eda19d389132ade30212 *R/fast-REML.r
dea8bcbb4668ad0a6dfe3420bebebf48 *R/gam.fit3.r
6f57e0f2cb348d07754b0e41a374e76c *R/gam.fit4.r
cb165a0a11de9686910fb27b0d6450ec *R/gam.fit3.r
7734ba62022328a50b771e721df98a72 *R/gam.fit4.r
1b620b840ca67c9759639bd6e53824e3 *R/gam.sim.r
66257913c556f135657b7c12b6ed733d *R/gamlss.r
cceac26b3d01513f8f87eacae91ddae0 *R/gamm.r
4a0ce642d904d7f871b5b766c04a2af4 *R/jagam.r
71d23c1ee219ca4242d5f49ccd164970 *R/mgcv.r
10facb791e4cfd123d183f05660119c6 *R/jagam.r
8cebce4d68d4c45d661073756ca7b202 *R/mgcv.r
2feca0dc9d354de7bc707c67a5891364 *R/misc.r
03772998ab05887f2eeae10dd6efe983 *R/mvam.r
586b0b4447aeb73b28fac9bdfefd3e21 *R/plots.r
38a7e6b503af65a76ffe999ac66049bb *R/smooth.r
5d07d6e4f75a64b6d04cda9e23278d70 *R/plots.r
cf3543d1d4b7fe6fb8576f681b345722 *R/smooth.r
666d7fd36fda68b928993d5388b0d7fc *R/soap.r
76cc875719bf0ef9eab45ea5bfeccda6 *R/sparse.r
e468195a83fab90da8e760c2c3884bd3 *data/columb.polys.rda
......@@ -58,9 +58,9 @@ e75719779e18c723ee1fd17e44e7901b *man/formXtViX.Rd
88888e966394c9f9792d9537341d053c *man/formula.gam.Rd
4da4d585b329769eb44f0c7a6e7dd554 *man/fs.test.Rd
6f405acde2d7b6f464cf45f5395113ba *man/full.score.Rd
912f575e1a6a7c9b1b94b2130fdfb38b *man/gam.Rd
0c075e9e58df199d59e77d94b712fde6 *man/gam.Rd
adaf0bd8e82d9472823cf3f3fa05e111 *man/gam.check.Rd
49de68e2abeb557b994032e4d7b5407a *man/gam.control.Rd
8931cd75ddec14d91ec94dec6ba69362 *man/gam.control.Rd
44db24b66ce63bc16d2c8bc3f5b42ac5 *man/gam.convergence.Rd
1cf5145859af2263f4e3459f40e1ab23 *man/gam.fit.Rd
4728be401da6eceb8b0c257377dc5d01 *man/gam.fit3.Rd
......@@ -97,8 +97,8 @@ df702cea24d0f92044a973b66a57e21f *man/missing.data.Rd
00ccf213c31910cd14f1df65a300eb33 *man/model.matrix.gam.Rd
2f2fdc722c5e9e58664da9378451cd4a *man/mono.con.Rd
d33914a328f645af13f5a42914ca0f35 *man/mroot.Rd
0748a44497317a19857f81bd76d162db *man/multinom.Rd
d70954045abda626a357951da5e2cbca *man/mvn.Rd
f624f3afcf4e2192e8f724d45257d983 *man/multinom.Rd
8fa6cf27db7192bbad6a2d41d2780937 *man/mvn.Rd
1064099913e539a75bf763c764bc72a1 *man/negbin.Rd
8a6a1926188511235f1e7406120c791e *man/new.name.Rd
00e39f302ab5efbe3b14265fffc16c18 *man/notExp.Rd
......@@ -114,7 +114,7 @@ ee9352ba4c531a8def16deddcab9a9fd *man/pdIdnot.Rd
b903ebcf31703db156e033fdfa527d73 *man/plot.gam.Rd
c27a6b886929b1dc83bf4b90cae848f9 *man/polys.plot.Rd
1a9d83c9fc67e5f0fc85d66d3112f4ef *man/predict.bam.Rd
2892714c395537c0cca29914989b1d50 *man/predict.gam.Rd
93f41380f769dff6a21394d80508c565 *man/predict.gam.Rd
cf14ce6cf8e4147f0f5c6e5b93b2af73 *man/print.gam.Rd
6d0ce4e574fabceffdbedd46c91364cb *man/qq.gam.Rd
22b7dcbc8ff4096365fa98ce56b957c9 *man/rTweedie.Rd
......@@ -122,7 +122,7 @@ fc1985e7dd5222182c4a8a939963b965 *man/random.effects.Rd
c523210ae95cb9aaa0aaa1c37da1a4c5 *man/residuals.gam.Rd
3c747a8066bcc28ae706ccf74f903d3e *man/rig.Rd
9f6f46f5c5da080bc82f9aa4685d364a *man/rmvn.Rd
845ec29324583d18c8dc150625e153e3 *man/s.Rd
c4be33830dfeb9e0dc766f8e5498931d *man/s.Rd
d515e51ec98d73af6166f7b31aeaba9b *man/scat.Rd
898e7cc2def2ee234475e68d0b904b29 *man/sdiag.Rd
8e968226c2b65ee89c8de2fd9869b086 *man/single.index.Rd
......@@ -149,11 +149,11 @@ b55a396da77559dac553613146633f97 *man/sp.vcov.Rd
83bd8e097711bf5bd0fff09822743d43 *man/spasm.construct.Rd
b9394812e5398ec95787c65c1325a027 *man/step.gam.Rd
f0791d830687d6155efb8a73db787401 *man/summary.gam.Rd
9ee8b9bd71f1b777ceb638fa21143cb9 *man/t2.Rd
36db3873e3e810ab6ee481f177d2535c *man/te.Rd
a0b0988dba55cca5b4b970e035e3c749 *man/t2.Rd
a27690f33b9a7bd56d9c1779c64896cc *man/te.Rd
6eebb6ef90374ee09453d6da6449ed79 *man/tensor.prod.model.matrix.Rd
f22f1cee0ff2b70628846d1d0f8e9a66 *man/trichol.Rd
c6c5fe7f6bfe07b63080248020dab331 *man/uniquecombs.Rd
94154ff18af819a7bb83919ee10db0de *man/uniquecombs.Rd
a16b3a5a4d13c705dcab8d1cd1b3347e *man/vcov.gam.Rd
281e73658c726997196727a99a4a1f9e *man/vis.gam.Rd
07a73758156dfa580c6e92edd34b0654 *man/ziP.Rd
......
......@@ -129,7 +129,7 @@ compress.df <- function(dat,m=NULL) {
names(dat0) <- names(dat)
dat <- dat0;rm(dat0)
}
xu <- uniquecombs(dat)
xu <- uniquecombs(dat,TRUE)
if (nrow(xu)>mm*mf) { ## too many unique rows to use only unique
for (i in 1:d) if (!is.factor(dat[,i])) { ## round the metric variables
xl <- range(dat[,i])
......@@ -138,7 +138,7 @@ compress.df <- function(dat,m=NULL) {
kx <- round((dat[,i]-xl[1])/dx)+1
dat[,i] <- xu[kx] ## rounding the metric variables
}
xu <- uniquecombs(dat)
xu <- uniquecombs(dat,TRUE)
}
k <- attr(xu,"index")
## shuffle rows in order to avoid induced dependencies between discretized
......
......@@ -450,6 +450,7 @@ ocat <- function(theta=NULL,link="identity",R=NULL) {
theta <- log(theta)
}
R3 <- length(G$family$getTheta())+2
if (!is.numeric(G$y)) stop("Response should be integer class labels")
if (R3>2&&G$family$n.theta>0) {
Theta <- ocat.ini(R3,G$y)
G$family$putTheta(Theta)
......
......@@ -849,9 +849,10 @@ Vb.corr <- function(X,L,lsp0,S,off,dw,w,rho,Vr,nth=0,scale.est=FALSE) {
## matrix.
## dw is derivative w.r.t. all the smoothing parameters and family parameters as if these
## were not linked, but not the scale parameter, of course. Vr includes scale uncertainty,
## if scale extimated...
## if scale estimated...
## nth is the number of initial elements of rho that are not smoothing
## parameters, scale.est is TRUE is scale estimated
## parameters, scale.est is TRUE if scale estimated by REML and must be
## dropped from s.p.s
M <- length(off) ## number of penalty terms
if (scale.est) {
## drop scale param from L, rho and Vr...
......@@ -952,43 +953,56 @@ gam.fit3.post.proc <- function(X,L,lsp0,S,off,object) {
qrx <- pqr(sqrt(object$weights)*X,object$control$nthreads)
R <- pqr.R(qrx);R[,qrx$pivot] <- R
if (!is.na(object$reml.scale)&&!is.null(object$db.drho)) { ## compute sp uncertainty correction
M <- ncol(object$db.drho)
## transform to derivs w.r.t. working, noting that an extra final row of L
## may be present, relating to scale parameter (for which db.drho is 0 since it's a scale parameter)
if (!is.null(L)) {
object$db.drho <- object$db.drho%*%L[1:M,,drop=FALSE]
M <- ncol(object$db.drho)
}
## extract cov matrix for log smoothing parameters...
ev <- eigen(object$outer.info$hess,symmetric=TRUE)
d <- ev$values;ind <- d <= 0
d[ind] <- 0;d[!ind] <- 1/sqrt(d[!ind])
rV <- (d*t(ev$vectors))[,1:M] ## root of cov matrix
Vc <- crossprod(rV%*%t(object$db.drho))
## set a prior precision on the smoothing parameters, but don't use it to
## fit, only to regularize Cov matrix. exp(4*var^.5) gives approx
## multiplicative range. e.g. var = 5.3 says parameter between .01 and 100 times
## estimate. Avoids nonsense at `infinite' smoothing parameters.
# dpv <- rep(0,ncol(object$outer.info$hess))
# dpv[1:M] <- 1/10 ## prior precision (1/var) on log smoothing parameters
# Vr <- chol2inv(chol(object$outer.info$hess + diag(dpv,ncol=length(dpv))))[1:M,1:M]
# Vc <- object$db.drho%*%Vr%*%t(object$db.drho)
d <- ev$values; d[ind] <- 0;d <- 1/sqrt(d+1/10)
Vr <- crossprod(d*t(ev$vectors))
#Vc2 <- scale*Vb.corr(X,L,S,off,object$dw.drho,object$working.weights,log(object$sp),Vr)
## Note that db.drho and dw.drho are derivatives w.r.t. full set of smoothing
## parameters excluding any scale parameter, but Vr includes info for scale parameter
## if it has been estiamted.
nth <- if (is.null(object$family$n.theta)) 0 else object$family$n.theta ## any parameters of family itself
Vc2 <- scale*Vb.corr(R,L,lsp0,S,off,object$dw.drho,w=NULL,log(object$sp),Vr,nth,object$scale.estimated)
hess <- object$outer.info$hess
edge.correct <- if (is.null(attr(hess,"edge.correct"))) FALSE else TRUE
K <- if (edge.correct) 2 else 1
for (k in 1:K) {
if (k==1) { ## fitted model computations
db.drho <- object$db.drho
dw.drho <- object$dw.drho
lsp <- log(object$sp)
} else { ## edge corrected model computations
db.drho <- attr(hess,"db.drho1")
dw.drho <- attr(hess,"dw.drho1")
lsp <- attr(hess,"lsp1")
hess <- attr(hess,"hess1")
}
M <- ncol(db.drho)
## transform to derivs w.r.t. working, noting that an extra final row of L
## may be present, relating to scale parameter (for which db.drho is 0 since it's a scale parameter)
if (!is.null(L)) {
db.drho <- db.drho%*%L[1:M,,drop=FALSE]
M <- ncol(db.drho)
}
## extract cov matrix for log smoothing parameters...
ev <- eigen(hess,symmetric=TRUE)
d <- ev$values;ind <- d <= 0
d[ind] <- 0;d[!ind] <- 1/sqrt(d[!ind])
rV <- (d*t(ev$vectors))[,1:M] ## root of cov matrix
Vc <- crossprod(rV%*%t(db.drho))
## set a prior precision on the smoothing parameters, but don't use it to
## fit, only to regularize Cov matrix. exp(4*var^.5) gives approx
## multiplicative range. e.g. var = 5.3 says parameter between .01 and 100 times
## estimate. Avoids nonsense at `infinite' smoothing parameters.
d <- ev$values; d[ind] <- 0;
d <- if (k==1) 1/sqrt(d+1/10) else 1/sqrt(d+1e-7)
Vr <- crossprod(d*t(ev$vectors))
## Note that db.drho and dw.drho are derivatives w.r.t. full set of smoothing
## parameters excluding any scale parameter, but Vr includes info for scale parameter
## if it has been estimated.
nth <- if (is.null(object$family$n.theta)) 0 else object$family$n.theta ## any parameters of family itself
drop.scale <- object$scale.estimated && !(object$method %in% c("P-REML","P-ML"))
Vc2 <- scale*Vb.corr(R,L,lsp0,S,off,dw.drho,w=NULL,lsp,Vr,nth,drop.scale)
Vc <- Vb + Vc + Vc2 ## Bayesian cov matrix with sp uncertainty
## finite sample size check on edf sanity...
edf2 <- rowSums(Vc*crossprod(R))/scale
if (sum(edf2)>sum(edf1)) {
#cat("\n edf2=",sum(edf2)," edf1=",sum(edf1));
edf2 <- edf1
}
Vc <- Vb + Vc + Vc2 ## Bayesian cov matrix with sp uncertainty
## finite sample size check on edf sanity...
if (k==1) { ## compute edf2 only with fitted model, not edge corrected
edf2 <- rowSums(Vc*crossprod(R))/scale
if (sum(edf2)>sum(edf1)) {
edf2 <- edf1
}
}
} ## k loop
} else edf2 <- Vc <- NULL
list(Vc=Vc,Vb=Vb,Ve=Ve,edf=edf,edf1=edf1,edf2=edf2,hat=hat,F=F,R=R)
} ## gam.fit3.post.proc
......@@ -1238,7 +1252,7 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
control,gamma,scale,conv.tol=1e-6,maxNstep=5,maxSstep=2,
maxHalf=30,printWarn=FALSE,scoreType="deviance",start=NULL,
mustart = NULL,null.coef=rep(0,ncol(X)),pearson.extra,
dev.extra=0,n.true=-1,Sl=NULL,...)
dev.extra=0,n.true=-1,Sl=NULL,edge.correct=FALSE,...)
## Newton optimizer for GAM reml/gcv/aic optimization that can cope with an
## indefinite Hessian. Main enhancements are:
## i) always perturbs the Hessian to +ve definite if indefinite
......@@ -1345,6 +1359,7 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
################################
## Start of Newton iteration....
################################
qerror.thresh <- .8 ## quadratic approx error to tolerate in a step
for (i in 1:200) {
if (control$trace) {
cat("\n",i,"newton max(|grad|) =",max(abs(grad)),"\n")
......@@ -1379,6 +1394,11 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
# printWarn=FALSE,mustart=mustart,
# scoreType=scoreType,eps=eps,null.coef=null.coef,...)
# }
## exclude dimensions from Newton step when the derviative is
## tiny relative to largest, as this space is likely to be poorly
## modelled on scale of Newton step...
uconv.ind <- uconv.ind & abs(grad)>max(abs(grad))*.001
## exclude apparently converged gradients from computation
hess1 <- hess[uconv.ind,uconv.ind]
......@@ -1403,9 +1423,15 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
Nstep[uconv.ind] <- -drop(U%*%(d*(t(U)%*%grad1))) # (modified) Newton direction
Sstep <- -grad/max(abs(grad)) # steepest descent direction
ms <- max(abs(Nstep)) ## note smaller permitted step if !pdef
mns <- maxNstep
# mns <- if (pdef) maxNstep else maxNstep/3 ## more cautious if not pdef
# if (!all(Nstep*Sstep >= 0)) mns <- mns/2 ## bit more cautious if directions differ
if (ms>maxNstep) Nstep <- mns * Nstep/ms
ms <- max(abs(Nstep))
if (ms>maxNstep) Nstep <- maxNstep * Nstep/ms
sd.unused <- TRUE ## steepest descent direction not yet tried
## try the step ...
if (sp.trace) cat(lsp,"\n")
......@@ -1429,8 +1455,9 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
mustart=mustart,scoreType=scoreType,null.coef=null.coef,
pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
sd.unused <- TRUE ## steepest descent direction not yet tried
## get the change predicted for this step according to the quadratic model
pred.change <- sum(grad*Nstep) + 0.5*t(Nstep) %*% hess %*% Nstep
if (reml) {
score1 <- b$REML
} else if (scoreType=="GACV") {
......@@ -1440,8 +1467,9 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
} else score1 <- b$GCV
## accept if improvement, else step halve
ii <- 0 ## step halving counter
##sc.extra <- 1e-4*sum(grad*Nstep) ## -ve sufficient decrease
if (is.finite(score1) && score1<score && pdef) { ## immediately accept step if it worked and positive definite
score.change <- score1 - score
qerror <- abs(pred.change-score.change)/(max(abs(pred.change),abs(score.change))+score.scale*conv.tol) ## quadratic approx error
if (is.finite(score1) && score.change<0 && pdef && qerror < qerror.thresh) { ## immediately accept step if it worked and positive definite
old.score <- score
mustart <- b$fitted.values
etastart <- b$linear.predictors
......@@ -1465,16 +1493,14 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
grad <- rho$rho1*grad
}
} else if (!is.finite(score1) || score1>=score) { ## initial step failed to improve score, try step halving ...
} else if (!is.finite(score1) || score1>=score||qerror >= qerror.thresh) { ## initial step failed, try step halving ...
step <- Nstep ## start with the (pseudo) Newton direction
##sc.extra <- 1e-4*sum(grad*step) ## -ve sufficient decrease
while ((!is.finite(score1) || score1>=score) && ii < maxHalf) {
while ((!is.finite(score1) || score1>=score ||qerror >= qerror.thresh) && ii < maxHalf) {
if (ii==3&&i<10) { ## Newton really not working - switch to SD, but keeping step length
s.length <- min(sum(step^2)^.5,maxSstep)
step <- Sstep*s.length/sum(Sstep^2)^.5 ## use steepest descent direction
sd.unused <- FALSE ## signal that SD already tried
} else step <- step/2
##if (ii>3) Slength <- Slength/2 ## keep track of SD step length
if (!is.null(lsp.max)) { ## need to take step in delta space
delta1 <- delta + step
lsp1 <- rt(delta1,lsp1.max)$rho ## transform to log sp space
......@@ -1485,7 +1511,7 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
printWarn=FALSE,start=start,mustart=mustart,scoreType=scoreType,
null.coef=null.coef,pearson.extra=pearson.extra,
dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
pred.change <- sum(grad*step) + 0.5*t(step) %*% hess %*% step ## Taylor prediction of change
if (reml) {
score1 <- b1$REML
} else if (scoreType=="GACV") {
......@@ -1493,8 +1519,9 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
} else if (scoreType=="UBRE") {
score1 <- b1$UBRE
} else score1 <- b1$GCV
##sc.extra <- 1e-4*sum(grad*Nstep) ## -ve sufficient decrease
if (is.finite(score1) && score1 < score) { ## accept
score.change <- score1 - score
qerror <- abs(pred.change-score.change)/(max(abs(pred.change),abs(score.change))+score.scale*conv.tol) ## quadratic approx error
if (is.finite(score1) && score.change < 0 && qerror < qerror.thresh) { ## accept
if (pdef||!sd.unused) { ## then accept and compute derivatives
b <- gam.fit3(x=X, y=y, sp=L%*%lsp1+lsp0,Eb=Eb,UrS=UrS,
offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=2,
......@@ -1524,13 +1551,13 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
}
} else { ## still need to try the steepest descent step to see if it does better
b <- b1
score2 <- score1
score2 <- score1 ## store temporarily and restore below
}
score1 <- score - abs(score) - 1 ## make sure that score1 < score
score1 <- score - abs(score) - 1 ## make sure that score1 < score (restore once out of loop)
} # end of if (score1<= score ) # accept
if (!is.finite(score1) || score1>=score) ii <- ii + 1
if (!is.finite(score1) || score1>=score || qerror >= qerror.thresh) ii <- ii + 1
} ## end while (score1>score && ii < maxHalf)
if (!pdef&&sd.unused&&ii<maxHalf) score1 <- score2
if (!pdef&&sd.unused&&ii<maxHalf) score1 <- score2 ## restored (not needed if termination on ii==maxHalf)
} ## end of step halving branch
## if the problem is not positive definite, and the sd direction has not
......@@ -1554,6 +1581,7 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
printWarn=FALSE,start=start,mustart=mustart,scoreType=scoreType,
null.coef=null.coef,pearson.extra=pearson.extra,
dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
pred.change <- sum(grad*step) + 0.5*t(step) %*% hess %*% step ## Taylor prediction of change
if (reml) {
score3 <- b1$REML
} else if (scoreType=="GACV") {
......@@ -1561,10 +1589,12 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
} else if (scoreType=="UBRE") {
score3 <- b1$UBRE
} else score3 <- b1$GCV
if (!is.finite(score2)||(is.finite(score3)&&score3<=score2)) { ## accept step - better than last try
score.change <- score3 - score
qerror <- abs(pred.change-score.change)/(max(abs(pred.change),abs(score.change))+score.scale*conv.tol) ## quadratic approx error
if (!is.finite(score2)||(is.finite(score3)&&score3<=score2&&qerror<qerror.thresh)) { ## record step - best SD step so far
score2 <- score3
lsp2 <- lsp3
## if (!is.null(lsp.max)) delta2 <- delta3
if (!is.null(lsp.max)) delta2 <- delta3
}
## stop when improvement found, and shorter step is worse...
if ((is.finite(score2)&&is.finite(score3)&&score2<score&&score3>score2)||kk==40) ok <- FALSE
......@@ -1574,7 +1604,7 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
if (is.finite(score2) && score2<score1) {
lsp1 <- lsp2
if (!is.null(lsp.max)) delta1 <- delta
if (!is.null(lsp.max)) delta1 <- delta2
score1 <- score2
}
......@@ -1634,7 +1664,53 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
warning("Iteration limit reached without full convergence - check carefully")
} else ct <- "full convergence"
b$dVkk <- NULL
list(score=score,lsp=lsp,lsp.full=L%*%lsp+lsp0,grad=grad,hess=hess,iter=i,conv =ct,score.hist = score.hist[!is.na(score.hist)],object=b)
if (as.logical(edge.correct)&&reml) {
## for those smoothing parameters that appear to be at working infinity
## reduce them until there is a detectable increase in RE/ML...
flat <- which(abs(grad2) < abs(grad)*100) ## candidates for reduction
REML <- b$REML
alpha <- if (is.logical(edge.correct)) .02 else abs(edge.correct) ## target RE/ML change per sp
b1 <- b; lsp1 <- lsp
if (length(flat)) for (i in flat) {
REML <- b1$REML + alpha
while (b1$REML < REML) {
lsp1[i] <- lsp1[i] - 1
b1 <- gam.fit3(x=X, y=y, sp=L%*%lsp1+lsp0,Eb=Eb,UrS=UrS,
offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=0,
control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=start,
mustart=mustart,scoreType=scoreType,null.coef=null.coef,
pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
}
}
b1 <- gam.fit3(x=X, y=y, sp=L%*%lsp1+lsp0,Eb=Eb,UrS=UrS,
offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=2,
control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=start,
mustart=mustart,scoreType=scoreType,null.coef=null.coef,
pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
score1 <- b1$REML;grad1 <- b1$REML1;hess1 <- b1$REML2
grad1 <- t(L)%*%grad1
hess1 <- t(L)%*%hess1%*%L
if (!is.null(lsp.max)) { ## need to transform to delta space
delta <- delta1
rho <- rt(delta,lsp1.max)
nr <- length(rho$rho1)
hess1 <- diag(rho$rho1,nr,nr)%*%hess1%*%diag(rho$rho1,nr,nr) + diag(rho$rho2*grad1)
grad1 <- rho$rho1*grad1
}
attr(hess,"edge.correct") <- TRUE
attr(hess,"hess1") <- hess1
attr(hess,"db.drho1") <- b1$db.drho
attr(hess,"dw.drho1") <- b1$dw.drho
attr(hess,"lsp1") <- lsp1
attr(hess,"rp") <- b1$rp
} ## if edge.correct
list(score=score,lsp=lsp,lsp.full=L%*%lsp+lsp0,grad=grad,hess=hess,iter=i,
conv =ct,score.hist = score.hist[!is.na(score.hist)],object=b)
} ## newton
......
......@@ -309,19 +309,9 @@ gam.fit4 <- function(x, y, sp, Eb,UrS=list(),
else x %*% start)
}
else family$linkfun(mustart)
##mu.eta <- family$mu.eta
##Dd <- family$Dd
mu <- linkinv(eta);etaold <- eta
## need an initial `null deviance' to test for initial divergence...
## if (!is.null(start)) null.coef <- start - can be on edge of feasible - not good
#null.eta <- as.numeric(x%*%null.coef + as.numeric(offset))
#old.pdev <- sum(dev.resids(y, linkinv(null.eta), weights,theta)) + t(null.coef)%*%St%*%null.coef
coefold <- null.coef
conv <- boundary <- FALSE
......@@ -341,16 +331,7 @@ gam.fit4 <- function(x, y, sp, Eb,UrS=list(),
good <- is.finite(w)&is.finite(wz)
z[!is.finite(z)] <- 0 ## avoid NaN in .C call - unused anyway
} else use.wy <- family$use.wz
#if (sum(!good)) {
# good1 <- is.finite(w)&good ## make sure w finite too
# w[!is.finite(w)] <- 0 ## clear infinite w
# w[!good1&w==0] <- max(w)*.Machine$double.eps^.5 ## reset zero value weights for problem elements
# dd$Deta.Deta2[!good] <- .5*dd$Deta[!good]/w[!good] ## reset problem elements to finite
# good <- is.finite(dd$Deta.Deta2) ## check in case Deta not finite, for example
#}
#z <- (eta-offset)[good] - dd$Deta.Deta2[good] ## - .5 * dd$Deta[good] / w
oo <- .C(C_pls_fit1,
y=as.double(z[good]),X=as.double(x[good,]),w=as.double(w[good]),wy = as.double(wz[good]),
E=as.double(Sr),Es=as.double(Eb),n=as.integer(sum(good)),
......@@ -364,7 +345,6 @@ gam.fit4 <- function(x, y, sp, Eb,UrS=list(),
## index weights that are finite and positive
good <- is.finite(dd$Deta2)
good[good] <- dd$Deta2[good]>0
#w <- dd$Deta2*.5;
w[!good] <- 0
wz <- w*(eta-offset) - .5*dd$Deta
z <- (eta-offset) - dd$Deta.Deta2
......@@ -374,11 +354,7 @@ gam.fit4 <- function(x, y, sp, Eb,UrS=list(),
good <- is.finite(w)&is.finite(wz)
z[!is.finite(z)] <- 0 ## avoid NaN in .C call - unused anyway
} else use.wy <- family$use.wz
#thresh <- max(w[good])*.Machine$double.eps^.5
#w[w < thresh] <- thresh
#good <- is.finite(dd$Deta)
#z <- (eta-offset)[good] - .5 * dd$Deta[good] / w[good]
oo <- .C(C_pls_fit1, ##C_pls_fit1,
y=as.double(z[good]),X=as.double(x[good,]),w=as.double(w[good]),wy = as.double(wz[good]),
E=as.double(Sr),Es=as.double(Eb),n=as.integer(sum(good)),
......@@ -485,7 +461,6 @@ gam.fit4 <- function(x, y, sp, Eb,UrS=list(),
## drop same parameter every iteration!)
grad <- 2 * t(x[good,])%*%((w[good]*(x%*%start)[good]-wz[good]))+ 2*St%*%start
if (max(abs(grad)) > control$epsilon*max(abs(start+coefold))/2) {
## if (max(abs(start-coefold))>control$epsilon*max(abs(start+coefold))/2) {
old.pdev <- pdev ## not converged quite enough
coef <- coefold <- start
etaold <- eta
......@@ -516,32 +491,17 @@ gam.fit4 <- function(x, y, sp, Eb,UrS=list(),
dd <- dDeta(y,mu,weights,theta,family,deriv)
w <- dd$Deta2 * .5
z <- (eta-offset) - dd$Deta.Deta2 ## - .5 * dd$Deta[good] / w
wf <- dd$EDeta2 * .5 ## Fisher type weights
wf <- pmax(0,dd$EDeta2 * .5) ## Fisher type weights
wz <- w*(eta-offset) - 0.5*dd$Deta ## Wz finite when w==0
gdi.type <- if (any(abs(w)<.Machine$double.xmin*1e20)||any(!is.finite(z))) 1 else 0
good <- is.finite(wz)&is.finite(w)
## exclude points for which gradient and second deriv are effectively zero and
## points with non finite second deriv or deriv ratio...
#min.Deta <- mean(abs(dd$Deta[is.finite(dd$Deta)]))*.Machine$double.eps*.001
#min.Deta2 <- mean(abs(dd$Deta2[is.finite(dd$Deta2)]))*.Machine$double.eps*.001
#good <- is.finite(dd$Deta.Deta2)&is.finite(w)&!(abs(dd$Deta2) < min.Deta2 & abs(dd$Deta) < min.Deta)
#if (control$trace&sum(!good)>0) cat("\n",sum(!good)," not good\n")
#w <- w[good]
#z <- (eta-offset)[good] - dd$Deta.Deta2[good] ## - .5 * dd$Deta[good] / w
#wf <- dd$EDeta2[good] * .5 ## Fisher type weights
#wz <- w*(eta-offset)[good] - 0.5*dd$Deta[good]
#residuals <- rep.int(NA, nobs)
residuals <- z - (eta - offset)
residuals[!is.finite(residuals)] <- NA
z[!is.finite(z)] <- 0 ## avoid passing NA etc to C code
ntot <- length(theta) + length(sp)
## if (deriv>1) n2d <- ntot*(1+ntot)/2 else n2d <- 0
rSncol <- unlist(lapply(UrS,ncol))
## Now drop any elements of dd that have been dropped in fitting...
if (sum(!good)>0) { ## drop !good from fields of dd, weights and pseudodata
......@@ -570,10 +530,7 @@ gam.fit4 <- function(x, y, sp, Eb,UrS=list(),
}
}
}
## can't have zero weights in gdi2 call - superceded by type=1 handling of w==0
#mwb <- max(abs(w))*.Machine$double.eps
#mwa <- min(abs(w[w!=0]))*.0001; if (mwa==0) mwa <- mwb
#w[w==0] <- min(mwa,mwb);
oo <- .C(C_gdi2,
X=as.double(x[good,]),E=as.double(Sr),Es=as.double(Eb),rS=as.double(unlist(rS)),
U1 = as.double(U1),sp=as.double(exp(sp)),theta=as.double(theta),
......@@ -582,7 +539,7 @@ gam.fit4 <- function(x, y, sp, Eb,UrS=list(),
Det2=as.double(dd$Deta2),Dth2=as.double(dd$Dth2),Det.th=as.double(dd$Detath),
Det2.th=as.double(dd$Deta2th),Det3=as.double(dd$Deta3),Det.th2 = as.double(dd$Detath2),
Det4 = as.double(dd$Deta4),Det3.th=as.double(dd$Deta3th), Deta2.th2=as.double(dd$Deta2th2),
beta=as.double(coef),b1=as.double(rep(0,ntot*ncol(x))),w1=rep(0,ntot*length(z)),
beta=as.double(coef),b1=as.double(rep(0,ntot*ncol(x))),w1=as.double(rep(0,ntot*length(z))),
D1=as.double(rep(0,ntot)),D2=as.double(rep(0,ntot^2)),
P=as.double(0),P1=as.double(rep(0,ntot)),P2 = as.double(rep(0,ntot^2)),
ldet=as.double(1-2*(scoreType=="ML")),ldet1 = as.double(rep(0,ntot)),
......@@ -1282,34 +1239,51 @@ gam.fit5.post.proc <- function(object,Sl,L,lsp0,S,off) {
lbb[ibd,ibd] <- lbbt
}
edge.correct <- FALSE
## compute the smoothing parameter uncertainty correction...
if (!is.null(object$outer.info$hess)&&!is.null(object$db.drho)) {
if (!is.null(L)) object$db.drho <- object$db.drho%*%L ## transform to derivs w.r.t. working
ev <- eigen(object$outer.info$hess,symmetric=TRUE)
d <- ev$values;ind <- d <= 0
d[ind] <- 0;d[!ind] <- 1/sqrt(d[!ind])
Vc <- crossprod((d*t(ev$vectors))%*%t(object$db.drho))
#dpv <- rep(0,ncol(object$outer.info$hess));M <- length(off)
#dpv[1:M] <- 1/100 ## prior precision (1/var) on log smoothing parameters
#Vr <- chol2inv(chol(object$outer.info$hess + diag(dpv,ncol=length(dpv))))[1:M,1:M]
#Vc <- object$db.drho%*%Vr%*%t(object$db.drho)
#dpv[1:M] <- 1/10 ## prior precision (1/var) on log smoothing parameters
#Vr <- chol2inv(chol(object$outer.info$hess + diag(dpv,ncol=length(dpv))))[1:M,1:M]
#M <- length(off)
d <- ev$values; d[ind] <- 0;
d <- d + 1/50 #d[1:M] <- d[1:M] + 1/50
d <- 1/sqrt(d)
Vr <- crossprod(d*t(ev$vectors))
#Vc2 <- Vb.corr(R,L,S,off,dw=NULL,w=NULL,log(object$sp),Vr)
Vc <- Vb + Vc #+ Vc2 ## Bayesian cov matrix with sp uncertainty
## reverse the various re-parameterizations...
} else Vc <- Vb
Vc <- Sl.repara(object$rp,Vc,inverse=TRUE)
Vc <- Sl.initial.repara(Sl,Vc,inverse=TRUE)
if (!is.null(object$outer.info$hess)&&!is.null(object$db.drho)) {
hess <- object$outer.info$hess
edge.correct <- if (is.null(attr(hess,"edge.correct"))) FALSE else TRUE
K <- if (edge.correct) 2 else 1
for (k in 1:K) {
if (k==1) { ## fitted model computations
db.drho <- object$db.drho
dw.drho <- object$dw.drho
lsp <- log(object$sp)
} else { ## edge corrected model computations
db.drho <- attr(hess,"db.drho1")
dw.drho <- attr(hess,"dw.drho1")
lsp <- attr(hess,"lsp1")
hess <- attr(hess,"hess1")
}
if (!is.null(L)) db.drho <- db.drho%*%L ## transform to derivs w.r.t. working
ev <- eigen(hess,symmetric=TRUE)
d <- ev$values;ind <- d <= 0
d[ind] <- 0;d[!ind] <- 1/sqrt(d[!ind])
Vc <- crossprod((d*t(ev$vectors))%*%t(db.drho)) ## first correction
d <- ev$values; d[ind] <- 0;
d <- if (k==1) 1/sqrt(d+1/50) else 1/sqrt(d+1e-7)
Vr <- crossprod(d*t(ev$vectors))
if (k==1) {
Vc1 <- Vc; Vr1 <- Vr; lsp1 <- lsp ## un-shifted version to use for edf
}
## reverse the various re-parameterizations...
}