Commit 93d0e014 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Updated version 1.8-24 from 'upstream/1.8-24'

with Debian dir 51ae6a722bf083fb633068238408c84b5a630b9b
parents 4b04f26f 9e061649
** denotes quite substantial/important changes
*** denotes really big changes
Liable to change in future release:
- method="GCV.Cp" as default in gam. Default will
change to "REML". (1.8-24, June 2018)
Currently deprecated and liable to be removed:
- gam performance iteration (1.8-19, Sept 2017)
- argument 'pers' in plot.gam (1.8-23, Nov 2018)
- argument 'pers' in plot.gam (1.8-23, Nov 2017)
Issues:
* t2 in bam(...,discrete=TRUE) - not treated as tensor products at
present, and reparameterization needs checking (also for bam).
1.8-24
* Extended Fellner Schall optimizer now avaialable for all families with 'gam'
using gam(...,optimizer="efs").
* Change to default behaviour of plot.gam when 'seWithMean=TRUE', and of
predict.gam when 'type="iterms"'. The extra uncertainty added to CIs or
standard errors now reflects the uncertainty in the mean in all other model
terms, not just the uncertanity in the mean of the fixed effects as before.
See ?plot.gam and ?predict.gam (including for how to get the old behaviour).
* 're' smooths can now accept matrix arguments: see ?linear.functional.terms.
* cox.ph now allows an offset to be provided.
* Fix in smoothCon for bug in case in which only a single coefficient is
involved in a sum-to-zero constraint. Could cause failure in e.g. t2 with
cc marginal.
* Model terms s, te etc are now always evaluated in mgcv workspace explicitly
to avoid masking problems in obscure circumstances.
* 'mrf' smooth documentation modified to make it clearer how to specify 'nb',
and code modified so that it is now possible to speficy the neighbour
structure using names rather than indices.
* 'bfgs' fix to handle extended families.
* plot.gam modified to only prompt (via devAskNewPage) for a new page after
the first page is used up.
* export 'k.check'.
* Fix to 'Rrank'. Previously a matrix R with more columns than rows could
cause a segfault.
* Fix to non-finite likelihood handling in gam.fit5.
* Fix in bgam.fitd to step reduce under indefinite deviance and to ensure
penalty evaluation is round off negative proof.
* newton slighty modified to avoid (small) chance of all sp's being dropped
for indef likelihood.
1.8-23
......
Package: mgcv
Version: 1.8-23
Version: 1.8-24
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: Mixed GAM Computation Vehicle with Automatic Smoothness
......@@ -18,6 +18,6 @@ LazyLoad: yes
ByteCompile: yes
License: GPL (>= 2)
NeedsCompilation: yes
Packaged: 2018-01-08 15:15:43 UTC; sw283
Packaged: 2018-06-18 13:08:20 UTC; sw283
Repository: CRAN
Date/Publication: 2018-01-15 00:23:36 UTC
Date/Publication: 2018-06-18 19:07:59 UTC
This diff is collapsed.
......@@ -14,7 +14,7 @@ export(anova.gam, bam, bam.update,bandchol, betar, cox.ph,concurvity,
gaulss,gam.side,get.var,gevlss,
influence.gam,
in.out,inSide,interpret.gam,initial.sp,
jagam,ldetS,
jagam,k.check,ldetS,
ldTweedie,
logLik.gam,ls.size,
magic, magic.post.proc, model.matrix.gam,mini.roots,
......
......@@ -211,7 +211,7 @@ discrete.mf <- function(gp,mf,names.pmf,m=NULL,full=TRUE) {
## * nr records the number of unique discretized covariate values
## i.e. the number of rows before the padding starts
## * k.start contains the starting column in index vector k, for
## each variable.
## each variable. The final element is the column beyond the last one.
## * k is the index matrix. The ith record of the 1st column of the
## jth variable is in row k[i,k.start[j]] of the corresponding
## column of mf.
......@@ -527,7 +527,7 @@ bgam.fitd <- function (G, mf, gp ,scale , coef=NULL,etastart = NULL,
for (iter in 1L:control$maxit) { ## main fitting loop
devold <- dev
dev <- 0
## accumulate the QR decomposition of the weighted model matrix
if (iter==1||!additive) {
qrx <- list()
......@@ -536,10 +536,13 @@ bgam.fitd <- function (G, mf, gp ,scale , coef=NULL,etastart = NULL,
eta <- Xbd(G$Xd,coef,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop) + offset
lsp.full <- G$lsp0
if (n.sp>0) lsp.full <- lsp.full + if (is.null(G$L)) lsp[1:n.sp] else G$L %*% lsp[1:n.sp]
Sb <- Sl.Sb(Sl,lsp.full,prop$beta) ## store S beta to allow rapid step halving
#Sb <- Sl.Sb(Sl,lsp.full,prop$beta) ## store S beta to allow rapid step halving
rSb <- Sl.rSb(Sl,lsp.full,prop$beta) ## store S beta to allow rapid step halving
if (iter>2) {
Sb0 <- Sl.Sb(Sl,lsp.full,b0)
bSb0 <- sum(b0*Sb0) ## penalty at start of beta step
#Sb0 <- Sl.Sb(Sl,lsp.full,b0)
#bSb0 <- sum(b0*Sb0) ## penalty at start of beta step
rSb0 <- Sl.rSb(Sl,lsp.full,b0)
bSb0 <- sum(rSb0^2)
## get deviance at step start, with current theta if efam
dev0 <- if (efam) sum(family$dev.resids(G$y,mu0,G$w,theta)) else
sum(family$dev.resids(G$y,mu0,G$w))
......@@ -551,10 +554,12 @@ bgam.fitd <- function (G, mf, gp ,scale , coef=NULL,etastart = NULL,
dev <- if (efam) sum(family$dev.resids(G$y,mu,G$w,theta)) else
sum(family$dev.resids(G$y,mu,G$w))
if (iter>2) { ## coef step length control
bSb <- sum(prop$beta*Sb) ## penalty at end of beta step
if (dev0 + bSb0 < dev + bSb && kk < 30) { ## beta step not improving current pen dev
#bSb <- sum(prop$beta*Sb) ## penalty at end of beta step
bSb <- sum(rSb^2) ## penalty at end of beta step
if ((!is.finite(dev) || dev0 + bSb0 < dev + bSb) && kk < 30) { ## beta step not improving current pen dev
coef <- (coef0 + coef)/2 ## halve the step
Sb <- (Sb0 + Sb)/2
#Sb <- (Sb0 + Sb)/2
rSb <- (rSb0 + rSb)/2
eta <- (eta0 + eta)/2
prop$beta <- (b0 + prop$beta)/2
kk <- kk + 1
......@@ -567,7 +572,8 @@ bgam.fitd <- function (G, mf, gp ,scale , coef=NULL,etastart = NULL,
eta0 <- eta
mu0 <- mu
b0 <- prop$beta ## beta repara
dev <- dev + sum(prop$beta*Sb) ## add penalty to deviance
#dev <- dev + sum(prop$beta*Sb) ## add penalty to deviance
dev <- dev + sum(rSb^2)
} else reml <- dev ## for convergence checking
if (efam) { ## extended family
......@@ -2113,7 +2119,7 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
} else { ## not a tensor smooth
v[[kb]] <- rep(0,0)
dt[kb] <- dt[kb] + 1
G$Xd[[k]] <- G$X[1:dk$nr[k],G$smooth[[i]]$first.para:G$smooth[[i]]$last.para]
G$Xd[[k]] <- G$X[1:dk$nr[k],G$smooth[[i]]$first.para:G$smooth[[i]]$last.para,drop=FALSE]
k <- k + 1
}
kb <- kb + 1
......
......@@ -39,6 +39,7 @@ cox.ph <- function (link = "identity") {
G$X <- G$X[y.order,,drop=FALSE]
attributes(G$X) <- attrX
G$w <- G$w[y.order]
G$offset <- G$offset[y.order]
})
postproc <- expression({
......@@ -46,7 +47,7 @@ cox.ph <- function (link = "identity") {
## baseline hazard estimation...
## first get the estimated hazard and prediction information...
G$X <- Sl.initial.repara(G$Sl,G$X,inverse=TRUE,cov=FALSE,both.sides=FALSE)
object$family$data <- G$family$hazard(G$y,G$X,object$coefficients,G$w)
object$family$data <- G$family$hazard(G$y,G$X,object$coefficients,G$w,G$offset)
rumblefish <- G$family$hazard(G$y,matrix(0,nrow(G$X),0),object$coefficients,G$w)
s0.base <- exp(-rumblefish$h[rumblefish$r]) ## no model baseline survival
s0.base[s0.base >= 1] <- 1 - 2*.Machine$double.eps ## avoid NA later
......@@ -69,7 +70,7 @@ cox.ph <- function (link = "identity") {
if (is.null(start)) start <- rep(0,ncol(x))
})
hazard <- function(y, X,beta,wt) {
hazard <- function(y, X,beta,wt,offset=0) {
## get the baseline hazard function information, given times in descending order in y
## model matrix (same ordering) in X, coefs in beta and censoring in wt (1 = death, 0
## = censoring)
......@@ -96,7 +97,7 @@ cox.ph <- function (link = "identity") {
}
#tr <- unique(y);nt <- length(tr)
p <- ncol(X)
eta <- as.double(X%*%beta)
eta <- as.double(X%*%beta) + offset
if (ns==1) {
r <- match(y,tr)
oo <- .C("coxpp",eta,A=as.double(X),as.integer(r),d=as.integer(wt),
......@@ -156,9 +157,10 @@ cox.ph <- function (link = "identity") {
}
if (sum(is.na(y))>0) stop("NA times supplied for cox.ph prediction")
X <- X[ii,,drop=FALSE];y <- y[ii];
n <- nrow(X)
if (is.null(off)) off <- rep(0,n)
if (is.null(strat)) {
n <- nrow(X)
oo <- .C("coxpred",as.double(X),t=as.double(y),as.double(beta),as.double(Vb),
oo <- .C("coxpred",as.double(X),t=as.double(y),as.double(beta),as.double(off),as.double(Vb),
a=as.double(family$data$a),h=as.double(family$data$h),q=as.double(family$data$q),
tr = as.double(family$data$tr),
n=as.integer(n),p=as.integer(ncol(X)),nt = as.integer(family$data$nt),
......@@ -175,7 +177,7 @@ cox.ph <- function (link = "identity") {
ind <- which(strat==pstrata[i]) ## prediction data index
trind <- which(family$data$tr.strat == pstrata[i])
n <- length(ind)
oo <- .C("coxpred",as.double(X[ind,]),t=as.double(y[ind]),as.double(beta),as.double(Vb),
oo <- .C("coxpred",as.double(X[ind,]),t=as.double(y[ind]),as.double(beta),as.double(off),as.double(Vb),
a=as.double(family$data$a[,trind]),h=as.double(family$data$h[trind]),q=as.double(family$data$q[trind]),
tr = as.double(family$data$tr[trind]),
n=as.integer(n),p=as.integer(p),nt = as.integer(length(trind)),
......@@ -205,7 +207,8 @@ cox.ph <- function (link = "identity") {
## or its Choleski factor
## D is the diagonal pre-conditioning matrix used to obtain Hp
## if Hr is the raw Hp then Hp = D*t(D*Hr)
if (!is.null(offset)&&sum(offset!=0)) stop("cox.ph does not yet handle offsets")
#!! if (!is.null(offset)&&sum(offset!=0)) stop("cox.ph does not yet handle offsets")
if (is.matrix(y)) {
## first column is time, second is *numeric* code indicating strata
......@@ -217,7 +220,7 @@ cox.ph <- function (link = "identity") {
p <- ncol(X)
deriv <- deriv - 1
mu <- X%*%coef
mu <- X%*%coef + offset
g <- rep(0,p);H <- rep(0,p*p)
if (deriv > 0) {
M <- ncol(d1b)
......
## (c) Simon N. Wood (ocat, tw, nb, ziP) & Natalya Pya (scat, beta),
## (c) Simon N. Wood & Natalya Pya (scat, beta),
## 2013-2017. Released under GPL2.
## See gam.fit4.r for testing functions fmud.test and fetad.test.
estimate.theta <- function(theta,family,y,mu,scale=1,wt=1,tol=1e-7,attachH=FALSE) {
## Simple Newton iteration to estimate theta for an extended family,
## given y and mu. To be iterated with estimation of mu given theta.
## If used within a PIRLS loop then divergence testing of coef update
## will have to re-compute pdev after theta update.
## Not clear best way to handle scale - could oprimize here as well
## Not clear best way to handle scale - could optimize here as well
if (!inherits(family,"extended.family")) stop("not an extended family")
......@@ -115,15 +116,15 @@ find.null.dev <- function(family,y,eta,offset,weights) {
## function fix.family.link.extended.family with functions
## gkg where k is 2,3 or 4 giving the kth derivative of the
## link over the first derivative of the link to the power k.
## for non standard links these functions muct be supplied.
## for non standard links these functions must be supplied.
## dev.resids - function computing deviance residuals.
## Dd - function returning derivatives of deviance residuals w.r.t. mu and theta.
## aic - function computing twice - log likelihood for 2df to be added to.
## aic - function computing twice -ve log likelihood for 2df to be added to.
## initialize - expression to be evaluated in gam.fit4 and initial.spg
## to initialize mu or eta.
## preinitialize - optional function of y and family, returning a list with optional elements
## Theta - intitial Theta and y - modified y for use in fitting (see e.g. ocat and betar)
## postproc - function with arguments family,y,prior.weights,fitted,linear.predictors,offset,intercept
## postproc - function with arguments family, y, prior.weights, fitted, linear.predictors, offset, intercept
## to call after fit to compute (optionally) the label for the family, deviance and null deviance.
## See ocat for simple example and betar or ziP for complicated. Called in estimate.gam.
## ls - function to evaluated log saturated likelihood and derivatives w.r.t.
......@@ -140,7 +141,7 @@ find.null.dev <- function(family,y,eta,offset,weights) {
## predict - optional function for predicting from model, called by predict.gam.
## family$data - optional list storing any family specific data for use, e.g. in predict
## function. - deprecated (commented out below - appears to be used nowhere)
## scale - < 0 to estimate. ignored if NULL
## extended family object for ordered categorical
......@@ -254,7 +255,7 @@ ocat <- function(theta=NULL,link="identity",R=NULL) {
} ## end of dev.resids
Dd <- function(y, mu, theta, wt=NULL, level=0) {
## derivatives of the deviance...
## derivatives of the ocat deviance...
# F <- function(x) { ## e^(x)/(1+e^x) without overflow
# h <- ind <- x > 0; h[ind] <- 1/(exp(-x[ind]) + 1)
# x <- exp(x[!ind]); h[!ind] <- (x/(1+x))
......@@ -387,7 +388,8 @@ ocat <- function(theta=NULL,link="identity",R=NULL) {
oo$Dth[ind,k] <- Da0[ind]*etk
oo$Dmuth[ind,k] <- Dmua0[ind]*etk
oo$Dmu2th[ind,k] <- Dmu2a0[ind]*etk
}
}
oo$EDmu2th <- oo$Dmu2th
}
if (level >1) { ## and the second derivative components
oo$Dmu4 <- 2*((3*b^2 + 4*a*c)/f + a2*(6*a2/f - 12*b)/f2 - d)/f
......@@ -440,7 +442,7 @@ ocat <- function(theta=NULL,link="identity",R=NULL) {
}
}
oo
} ## end of Dd
} ## end of Dd (ocat)
aic <- function(y, mu, theta=NULL, wt, dev) {
......@@ -473,77 +475,7 @@ ocat <- function(theta=NULL,link="identity",R=NULL) {
ls <- function(y,w,theta,scale) {
## the log saturated likelihood function.
#! actually only first line used since re-def as 0
#vec <- !is.null(attr(theta,"vec.grad"))
#lsth1 <- if (vec) matrix(0,length(y),R-2) else rep(0,R-2)
return(list(ls=0,lsth1=rep(0,R-2),lsth2=matrix(0,R-2,R-2)))
F <- function(x) {
h <- ind <- x > 0; h[ind] <- 1/(exp(-x[ind]) + 1)
x <- exp(x[!ind]); h[!ind] <- (x/(1+x))
h
}
Fdiff <- function(a,b) {
## cancellation resistent F(b)-F(a), b>a
h <- rep(1,length(b)); h[b>0] <- -1; eb <- exp(b*h)
h <- h*0+1; h[a>0] <- -1; ea <- exp(a*h)
ind <- b<0;bi <- eb[ind];ai <- ea[ind]
h[ind] <- bi/(1+bi) - ai/(1+ai)
ind1 <- a>0;bi <- eb[ind1];ai <- ea[ind1]
h[ind1] <- (ai-bi)/((ai+1)*(bi+1))
ind <- !ind & !ind1;bi <- eb[ind];ai <- ea[ind]
h[ind] <- (1-ai*bi)/((bi+1)*(ai+1))
h
}
R = length(theta)+2
alpha <- rep(0,R+1) ## the thresholds
alpha[1] <- -Inf;alpha[R+1] <- Inf
alpha[2] <- -1
if (R > 2) {
ind <- 3:R
alpha[ind] <- alpha[2] + cumsum(exp(theta))
}
al1 <- alpha[y+1];al0 = alpha[y]
g1 <- F((al1-al0)/2);g0 <- F((al0-al1)/2)
##A <- pmax(g1 - g0,.Machine$double.eps)
A <- Fdiff((al0-al1)/2,(al1-al0)/2)
ls <- sum(log(A))
B <- g1^2 - g1 + g0^2 - g0
C <- 2 * g1^3 - 3 * g1^2 + g1 - 2 * g0^3 + 3 * g0^2 - g0
Da0 <- .5 * B/A ; Da1 <- -0.5 *B/A
Da0a0 <- .25 * C/A - .25 * B^2/A^2
Da1a1 <- .25 * C/A - .25 * B^2/A^2
Da0a1 <- - .25 * C/A + .25 * B^2/A^2
i <- 0
n2d <- (R-2)*(R-1)/2
n <- length(y)
Dth <- matrix(0,n,R-2)
Dth2 <- matrix(0,n,n2d)
for (j in 1:(R-2)) for (k in j:(R-2)) {
i <- i + 1 ## the second deriv col
ind <- y >= j ## rest are zero
ar1.k <- ar.k <- rep(exp(theta[k]),n)
ar.k[y==R | y <= k] <- 0; ar1.k[y<k+2] <- 0
ar.j <- ar1.j <- rep(exp(theta[j]),n)
ar.j[y==R | y <= j] <- 0; ar1.j[y<j+2] <- 0
ar.kj <- ar1.kj <- rep(0,n)
if (k==j) {
ar.kj[y>k&y<R] <- exp(theta[k])
ar1.kj[y>k+1] <- exp(theta[k])
Dth[ind,k] <- Da1[ind]*ar.k[ind] + Da0[ind]*ar1.k[ind]
}
Dth2[,i] <- Da1a1*ar.k*ar.j + Da0a1*ar.k*ar1.j + Da1 * ar.kj +
Da0a0*ar1.k*ar1.j + Da0a1*ar1.k*ar.j + Da0 * ar1.kj
}
lsth2=colSums(Dth2)
if (R>2) {
ls2 <- matrix(0,R-2,R-2);ii <- 0
for (i in 1:(R-2)) for (j in i:(R-2)) {
ii <- ii + 1
ls2[i,j] <- ls2[j,i] <- lsth2[ii]
}
}
list(ls=ls,lsth1=colSums(Dth),lsth2=ls2)
} ## end of ls
## initialization is interesting -- needs to be with reference to initial cut-points
......@@ -748,7 +680,7 @@ nb <- function (theta = NULL, link = "log") {
}
Dd <- function(y, mu, theta, wt, level=0) {
## derivatives of the deviance...
## derivatives of the nb deviance...
##ltheta <- theta
theta <- exp(theta)
yth <- y + theta
......@@ -765,6 +697,7 @@ nb <- function (theta = NULL, link = "log") {
r$Dmuth <- 2 * wt * theta * (1 - yth/muth)/muth
r$Dmu3 <- 4 * wt * (yth/muth^3 - y/mu^3)
r$Dmu2th <- 2 * wt * theta * (2*yth/muth - 1)/muth^2
r$EDmu2th <- 2 * wt / muth^2
}
if (level>1) { ## whole damn lot
r$Dmu4 <- 2 * wt * (6*y/mu^4 - 6*yth/muth^4)
......@@ -918,7 +851,7 @@ tw <- function (theta = NULL, link = "log",a=1.01,b=1.99) {
}
Dd <- function(y, mu, theta, wt, level=0) {
## derivatives of the deviance...
## derivatives of the tw deviance...
a <- get(".a");b <- get(".b")
th <- theta
p <- if (th>0) (b+a*exp(-th))/(1+exp(-th)) else (b*exp(th)+a)/(exp(th)+1)
......@@ -1116,7 +1049,7 @@ betar <- function (theta = NULL, link = "logit",eps=.Machine$double.eps*100) {
r$Dth <- 2 * wt *theta*(-mu*log.yoney - log1p(-y)+ mu*psi0.muth+onemu*psi0.onemuth -psi0.th)
r$Dmuth <- r$Dmu + 2 * wt * theta^2*(mu*psi1.muth -onemu*psi1.onemuth)
r$Dmu3 <- 2 * wt *theta^3 * (psi2.muth - psi2.onemuth)
r$Dmu2th <- 2* r$Dmu2 + 2 * wt * theta^3* (mu*psi2.muth + onemu*psi2.onemuth)
r$EDmu2th <- r$Dmu2th <- 2* r$Dmu2 + 2 * wt * theta^3* (mu*psi2.muth + onemu*psi2.onemuth)
}
if (level>1) { ## whole lot
r$Dmu4 <- 2 * wt *theta^4 * (psi3.muth+psi3.onemuth)
......@@ -1387,7 +1320,7 @@ scat <- function (theta = NULL, link = "identity",min.df = 3) {
}
Dd <- function(y, mu, theta, wt, level=0) {
## derivatives of the deviance...
## derivatives of the scat deviance...
## ltheta <- theta
min.df <- get(".min.df")
nu <- exp(theta[1])+min.df; sig <- exp(theta[2])
......@@ -1847,6 +1780,7 @@ ziP <- function (theta = NULL, link = "identity",b=0) {
mu <- object$linear.predictors
wts <- object$prior.weights
res <- object$family$dev.resids(y,mu,wts)
## next line is correct as function returns -2*saturated.log.lik
res <- res - object$family$saturated.ll(y,object$family,wts)
fv <- predict.gam(object,type="response")
s <- attr(res,"sign")
......@@ -1854,7 +1788,7 @@ ziP <- function (theta = NULL, link = "identity",b=0) {
res <- as.numeric(sqrt(pmax(res,0)) * s)
}
res
} ## residuals
} ## residuals (ziP)
predict <- function(family,se=FALSE,eta=NULL,y=NULL,X=NULL,
beta=NULL,off=NULL,Vb=NULL) {
......
......@@ -241,7 +241,7 @@ Sl.Sb <- function(Sl,rho,beta) {
if (length(Sl[[b]]$S)==1) { ## singleton - multiple of identity
a[ind] <- a[ind] + beta[ind] * exp(rho[k])
k <- k + 1
} else { ## mulit-S block
} else { ## multi-S block
for (j in 1:length(Sl[[b]]$S)) {
a[ind] <- a[ind] + exp(rho[k]) * (Sl[[b]]$S[[j]] %*% beta[ind])
k <- k + 1
......@@ -251,6 +251,37 @@ Sl.Sb <- function(Sl,rho,beta) {
a
} ## Sl.Sb
Sl.rSb <- function(Sl,rho,beta) {
## Computes vector 'a' containing all terms rS %*% beta stacked end to end.
## sum of squares of 'a' this is bSb, but 'a' is linear in beta
## Assumes initial re-parameterization has taken
## place, so single penalties are multiples of identity and uses S for
## multi-S blocks. Logic is identical to Sl.addS.
k <- 1 ## sp counter
kk <- 0 ## total length of returned vector
if (length(Sl)>0) for (b in 1:length(Sl)) {
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
kk <- kk + length(Sl[[b]]$S)*length(ind)
}
a <- rep(0,kk)
kk <- 0
if (length(Sl)>0) for (b in 1:length(Sl)) {
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
if (length(Sl[[b]]$S)==1) { ## singleton - multiple of identity
a[kk + 1:length(ind)] <- beta[ind] * exp(rho[k]/2)
k <- k + 1
kk <- kk + length(ind)
} else { ## multi-S block
for (j in 1:length(Sl[[b]]$S)) {
a[kk + 1:length(ind)] <- exp(rho[k]/2) * (beta[ind] %*% Sl[[b]]$rS[[j]])
k <- k + 1
kk <- kk + length(ind)
}
}
}
a
} ## Sl.rSb
Sl.initial.repara <- function(Sl,X,inverse=FALSE,both.sides=TRUE,cov=TRUE,nt=1) {
## Routine to apply initial Sl re-parameterization to model matrix X,
## or, if inverse==TRUE, to apply inverse re-para to parameter vector
......@@ -879,7 +910,7 @@ Sl.fitChol <- function(Sl,XX,f,rho,yy=0,L=NULL,rho0=0,log.phi=0,phi.fixed=TRUE,
uconv.ind <- (abs(reml1) > tol)|(abs(diag(reml2))>tol)
hess <- reml2
grad <- reml1
if (length(grad)>0) {
if (length(grad)>0&&sum(uconv.ind)>0) {
if (sum(uconv.ind)!=ncol(reml2)) {
reml1 <- reml1[uconv.ind]
reml2 <- reml2[uconv.ind,uconv.ind]
......
......@@ -118,7 +118,7 @@ gam.fit3 <- function (x, y, sp, Eb,UrS=list(),
if (family$link==family$canonical) fisher <- TRUE else fisher=FALSE
## ... if canonical Newton = Fisher, but Fisher cheaper!
if (scale>0) scale.known <- TRUE else scale.known <- FALSE
if (!scale.known&&scoreType%in%c("REML","ML")) { ## the final element of sp is actually log(scale)
if (!scale.known&&scoreType%in%c("REML","ML","EFS")) { ## the final element of sp is actually log(scale)
nsp <- length(sp)
scale <- exp(sp[nsp])
sp <- sp[-nsp]
......@@ -137,9 +137,10 @@ gam.fit3 <- function (x, y, sp, Eb,UrS=list(),
q <- ncol(x)
if (length(UrS)) { ## find a stable reparameterization...
grderiv <- deriv*as.numeric(scoreType%in%c("REML","ML","P-REML","P-ML"))
grderiv <- if (scoreType=="EFS") 1 else deriv*as.numeric(scoreType%in%c("REML","ML","P-REML","P-ML"))
rp <- gam.reparam(UrS,sp,grderiv) ## note also detects fixed penalty if present
## Following is for debugging only...
# deriv.check <- FALSE
......@@ -177,7 +178,8 @@ gam.fit3 <- function (x, y, sp, Eb,UrS=list(),
rows.E <- q-Mp
Sr <- cbind(rp$E,matrix(0,nrow(rp$E),Mp))
St <- rbind(cbind(rp$S,matrix(0,nrow(rp$S),Mp)),matrix(0,Mp,q))
} else {
} else {
grderiv <- 0
T <- diag(q);
St <- matrix(0,q,q)
rSncol <- sp <- rows.E <- Eb <- Sr <- 0
......@@ -185,7 +187,12 @@ gam.fit3 <- function (x, y, sp, Eb,UrS=list(),
rp <- list(det=0,det1 = rep(0,0),det2 = rep(0,0),fixed.penalty=FALSE)
}
iter <- 0;coef <- rep(0,ncol(x))
if (scoreType=="EFS") {
scoreType <- "REML" ## basically optimizing REML
deriv <- 0 ## only derivatives of log|S|_+ required (see above)
}
conv <- FALSE
n <- nobs <- NROW(y) ## n is just to keep codetools happy
if (n.true <= 0) n.true <- nobs ## n.true is used in criteria in place of nobs
......@@ -773,7 +780,7 @@ gam.fit3 <- function (x, y, sp, Eb,UrS=list(),
boundary = boundary,D1=D1,D2=D2,P=P,P1=P1,P2=P2,trA=trA,trA1=trA1,trA2=trA2,
GCV=GCV,GCV1=GCV1,GCV2=GCV2,GACV=GACV,GACV1=GACV1,GACV2=GACV2,UBRE=UBRE,
UBRE1=UBRE1,UBRE2=UBRE2,REML=REML,REML1=REML1,REML2=REML2,rV=rV,db.drho=db.drho,
dw.drho=dw.drho,dVkk = matrix(oo$dVkk,nSp,nSp),
dw.drho=dw.drho,dVkk = matrix(oo$dVkk,nSp,nSp),ldetS1 = if (grderiv) rp$det1 else 0,
scale.est=scale.est,reml.scale= reml.scale,aic=aic.model,rank=oo$rank.est,K=Kmat)
} ## end gam.fit3
......@@ -1338,8 +1345,10 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
## tiny relative to largest, as this space is likely to be poorly
## modelled on scale of Newton step...
uconv.ind <- uconv.ind & abs(grad)>max(abs(grad))*.001
uconv.ind1 <- uconv.ind & abs(grad)>max(abs(grad))*.001
if (sum(uconv.ind1)==0) uconv.ind1 <- uconv.ind ## nothing left reset
if (sum(uconv.ind)==0) uconv.ind[which(abs(grad)==max(abs(grad)))] <- TRUE ## need at least 1 to update
## exclude apparently converged gradients from computation
hess1 <- hess[uconv.ind,uconv.ind]
grad1 <- grad[uconv.ind]
......@@ -1776,8 +1785,22 @@ bfgs <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
score <- b$UBRE;grad <- t(L)%*%b$UBRE1
} else { ## default to deviance based GCV
score <- b$GCV;grad <- t(L)%*%b$GCV1;
}
## dVkk only refers to smoothing parameters, but sp may contain
## extra parameters at start and scale parameter at end. Have
## to reduce L accordingly...
if (!is.null(family$n.theta)&&family$n.theta>0) {
ind <- 1:family$n.theta
nind <- ncol(L) - family$n.theta - if (family$n.theta + nrow(b$dVkk)<nrow(L)) 1 else 0
spind <- if (nind>0) family$n.theta+1:nind else rep(0,0)
rspind <- family$n.theta + 1:nrow(b$dVkk)
} else {
nind <- ncol(L) - if (nrow(b$dVkk)<nrow(L)) 1 else 0
spind <- if (nind>0) 1:nind else rep(0,0) ## index of smooth parameters
rspind <- 1:nrow(b$dVkk)
}
L0 <- if (nrow(L)==nrow(b$dVkk)) L else L[-nrow(L),-ncol(L)]
L0 <- L[rspind,spind] ##if (nrow(L)!=nrow(b$dVkk)) L[spind,spind] else L
initial$dVkk <- diag(t(L0) %*% b$dVkk %*% L0)
initial$score <- score;initial$grad <- grad;
initial$scale.est <- b$scale.est
......@@ -2004,9 +2027,10 @@ bfgs <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
else score.scale <- abs(trial$scale.est) + abs(trial$score) ##trial$dev/nrow(X) + abs(trial$score)
uconv.ind <- abs(trial$grad) > score.scale*conv.tol
if (sum(uconv.ind)) converged <- FALSE
if (length(uconv.ind)>length(trial$dVkk)) trial$dVkk <- c(trial$dVkk,score.scale)
#if (length(uconv.ind)>length(trial$dVkk)) trial$dVkk <- c(trial$dVkk,score.scale)
## following must be tighter than convergence...
uconv.ind <- abs(trial$grad) > score.scale*conv.tol*.1 | abs(trial$dVkk) > score.scale * conv.tol*.1
uconv.ind <- abs(trial$grad) > score.scale*conv.tol*.1
uconv.ind[spind] <- uconv.ind[spind] | abs(trial$dVkk) > score.scale * conv.tol*.1
if (abs(initial$score-trial$score) > score.scale*conv.tol) {
if (!sum(uconv.ind)) uconv.ind <- uconv.ind | TRUE ## otherwise can't progress
converged <- FALSE
......@@ -2045,10 +2069,11 @@ bfgs <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
trial$dscore <- sum(trial$grad*step)
trial$scale.est <- b$scale.est
trial$dVkk <- diag(t(L0) %*% b$dVkk %*% L0) ## curvature testing matrix
if (length(uconv.ind)>length(trial$dVkk)) trial$dVkk <- c(trial$dVkk,score.scale)
#if (length(uconv.ind)>length(trial$dVkk)) trial$dVkk <- c(trial$dVkk,score.scale)
rm(b);counter <- counter + 1
## note that following rolls back until there is clear signal in derivs...
uconv.ind0 <- abs(trial$grad) > score.scale*conv.tol*20 | abs(trial$dVkk) > score.scale * conv.tol * 20
uconv.ind0 <- abs(trial$grad) > score.scale*conv.tol*20
uconv.ind0[spind] <- uconv.ind0[spind] | abs(trial$dVkk) > score.scale * conv.tol * 20
uconv.ind0 <- uconv.ind0 | uconv.ind ## make sure we don't start rolling back unproblematic sps
}
uconv.ind <- uconv.ind | TRUE
......
This diff is collapsed.
......@@ -987,7 +987,7 @@ extract.lme.cov2<-function(b,data,start.level=1)
}
}
list(V=V,ind=Cind)
}
} ## extract.lme.cov2
extract.lme.cov<-function(b,data,start.level=1)
# function to extract the response data covariance matrix from an lme fitted
......@@ -1071,7 +1071,7 @@ extract.lme.cov<-function(b,data,start.level=1)
V <- V+Z%*%Vr%*%t(Z)
}
V
}
} ## extract.lme.cov
formXtViX <- function(V,X)
## forms X'V^{-1}X as efficiently as possible given the structure of
......@@ -1147,7 +1147,7 @@ gammPQL <- function (fixed, random, family, data, correlation, weights,
zz <- eta + fit0$residuals - off
wz <- fit0$weights
fam <- family
## find non clashing name for pseudodata and insert in formula
zz.name <- new.name("zz",names(data))
eval(parse(text=paste("fixed[[2]] <- quote(",zz.name,")")))
......@@ -1188,6 +1188,13 @@ gammPQL <- function (fixed, random, family, data, correlation, weights,
if (!converged) warning("gamm not converged, try increasing niterPQL")
fit$y <- fit0$y
fit$w <- w ## prior weights
## would require full edf to be computable with re terms include!
#if (is.null(correlation)) { ## then a conditional AIC is possible
# y <- fit$y;weights <- w; nobs <- length(y)
# eval(fam$initialize)
# dev <- sum(fam$dev.resids(y,mu,w))
# fit$aic <- fam$aic(y,n,mu,w,dev)
#}
fit
}