Commit 3e6d87f8 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Upstream version 1.8-18

parent a9b37307
......@@ -5,6 +5,60 @@ Currently deprecated and liable to be removed:
- single penalty tensor product smooths.
- p.type!=0 in summary.gam.
1.8-18
* Tweak to 'newton' to further reduce chance of false convergence at
indefinite point.
* Fix to bam.update to deal with NAs in response.
* 'scat' family now takes a 'min.df' argument which defaults to 3. Could
otherwise occasionally have indefinite LAML problems as df headed towards 2.
* Fix to `gam.fit4' where in rare circumstances the PIRLS iteration could
finish at an indefinite point, spoiling implicit differentiation.
* `gam.check' modified to fix a couple of issues with `gamm' fitted models, and
to warn that interpretability is reduced for such models.
* `qq.gam' default method slight modification to default generation of reference
quantiles. In theory previous method could cause a problem if enough
residuals were exactly equal.
* Fix to `plot.mrf.smooth' to deal with models with by variables.
* `plot.gam' fix to handling of plot limits when using 'trans' (from 1.8-16
'trans' could be applied twice).
* `plot.gam' argument 'rug' now defaults to 'NULL' corresponding to 'rug=TRUE'
if the number of data is <= 10000 and 'rug=FALSE' otherwise.
* bam(...,discrete=TRUE) could fail if NAs in the smooth terms caused data rows
to be dropped which led to parametric term factors having unused levels
(which were then not being dropped). Fixed (in discrete.mf).
* bam(...,discrete=TRUE,nthreads=n) now warns if n>1 and openMP is not
available on the platform being used.
* Sl.addS modified to use C code for some otherwise very slow matrix
subset and addition ops which could become rate limiting for
bam(...,discrete=TRUE).
* Parallel solves in Sl.iftChol can speed up bam(...,discrete=TRUE) with
large numbers of smoothing/variance parameters.
* 'gamm' now warns if called with extended families.
* disasterous 'te' in place of 'ti' typo in ?smooth.terms fixed thanks to
John McKinlay.
* Some `internal' functions exported to facilitate quantile gam methods
in separate package.
* Minor fix in gam.fit5 - 1 by 1 matrix coerced to scalar, to prevent failure
in some circumstances.
1.8-17
* Export gamlss.etamu, gamlss.gH and trind.generator to facilitate user
......
Package: mgcv
Version: 1.8-17
Version: 1.8-18
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: Mixed GAM Computation Vehicle with GCV/AIC/REML Smoothness
Title: Mixed GAM Computation Vehicle with Automatic Smoothness
Estimation
Description: GAMs, GAMMs and other generalized ridge regression with
multiple smoothing parameter estimation by GCV, REML or UBRE/AIC.
Includes a gam() function, a wide variety of smoothers, JAGS
support and distributions beyond the exponential family.
Description: Generalized additive (mixed) models, some of their extensions and
other generalized ridge regression with multiple smoothing
parameter estimation by (REstricted) Marginal Likelihood,
Generalized Cross Validation and similar. Includes a gam()
function, a wide variety of smoothers, JAGS support and
distributions beyond the exponential family.
Priority: recommended
Depends: R (>= 2.14.0), nlme (>= 3.1-64)
Imports: methods, stats, graphics, Matrix
......@@ -16,6 +18,6 @@ LazyLoad: yes
ByteCompile: yes
License: GPL (>= 2)
NeedsCompilation: yes
Packaged: 2017-02-06 11:03:57 UTC; sw283
Packaged: 2017-07-26 15:00:29 UTC; sw283
Repository: CRAN
Date/Publication: 2017-02-08 21:30:16
Date/Publication: 2017-07-28 07:28:50 UTC
This diff is collapsed.
useDynLib(mgcv, .registration = TRUE, .fixes = "C_")
export(anova.gam, bam, bam.update,bandchol, betar, cox.ph,concurvity,
cSplineDes,
cSplineDes,dDeta,
exclude.too.far,extract.lme.cov, extract.lme.cov2,
formXtViX, full.score, formula.gam,fixDependence,fix.family.link,
fix.family.var, fix.family.ls, fix.family.qf,fix.family.rd,
fs.test,fs.boundary,gam, gam2derivative,
gam2objective,
gamm, gam.check, gam.control,gam.fit3,
gam.fit,
gam.fit,gam.fit5.post.proc,
gamlss.etamu,gamlss.gH,
gam.outer,gam.vcomp, gamSim ,
gam.outer,gam.reparam, gam.vcomp, gamSim ,
gaulss,gam.side,get.var,gevlss,
influence.gam,
in.out,inSide,interpret.gam,initial.sp,
jagam,
jagam,ldetS,
ldTweedie,
logLik.gam,ls.size,
magic, magic.post.proc, model.matrix.gam,
magic, magic.post.proc, model.matrix.gam,mini.roots,
mono.con, mroot, multinom, mvn, nb, negbin, new.name,
notExp,notExp2,notLog,notLog2,pcls,null.space.dimension,
ocat,
......@@ -45,7 +45,7 @@ export(anova.gam, bam, bam.update,bandchol, betar, cox.ph,concurvity,
qq.gam,
residuals.gam,rig,rTweedie,rmvn,
Rrank,s,scat,sdiag,"sdiag<-",
sim2jam,
sim2jam,Sl.initial.repara,Sl.repara,Sl.setup,
slanczos,smooth2random,
smoothCon,smooth.construct,smooth.construct2,
smooth.construct.bs.smooth.spec,
......@@ -70,7 +70,7 @@ export(anova.gam, bam, bam.update,bandchol, betar, cox.ph,concurvity,
summary.gam,sp.vcov,
spasm.construct,spasm.sp,spasm.smooth,
t2,te,ti,tensor.prod.model.matrix,tensor.prod.penalties,
trichol,trind.generator,
totalPenaltySpace,trichol,trind.generator,
Tweedie,tw,uniquecombs, vcov.gam, vis.gam, ziP, ziplss)
importFrom(grDevices,cm.colors,dev.interactive,devAskNewPage,gray,grey,heat.colors,terrain.colors,topo.colors)
......
......@@ -184,9 +184,10 @@ check.term <- function(term,rec) {
} else return(0) ## no match
} ## check.term
discrete.mf <- function(gp,mf,pmf,m=NULL,full=TRUE) {
discrete.mf <- function(gp,mf,names.pmf,m=NULL,full=TRUE) {
## discretize the covariates for the terms specified in smooth.spec
## id not allowed. pmf is a model frame for just the
## id not allowed. names.pmf gives the names of the parametric part
## of mf, and is used to create a model frame for just the
## parametric terms --- mini.mf is applied to this.
## if full is FALSE then parametric and response terms are ignored
## and what is returned is a list where columns can be of
......@@ -282,8 +283,17 @@ discrete.mf <- function(gp,mf,pmf,m=NULL,full=TRUE) {
## padding is necessary if gam.setup is to be used for setup
if (full) {
maxr <- max(nr)
pmf0 <- mini.mf(pmf,maxr) ## deal with parametric components
maxr <- max(nr)
## If NA's caused rows to be dropped in mf, then they should
## also be dropped in pmf, otherwise we can end up with factors
## with more levels than unique observations, for example.
## The next couple of lines achieve this.
## find indices of terms in mf but not pmf...
di <- sort(which(!names(mf) %in% names.pmf),decreasing=TRUE)
## create copy of mf with only pmf variables...
mfp <- mf; for (i in di) mfp[[i]] <- NULL
#pmf0 <- mini.mf(pmf,maxr) ## deal with parametric components
pmf0 <- mini.mf(mfp,maxr) ## deal with parametric components
if (nrow(pmf0)>maxr) maxr <- nrow(pmf0)
mf0 <- c(mf0,pmf0) ## add parametric terms to end of mf0
......@@ -1450,7 +1460,7 @@ predict.bamd <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,excl
## now discretize covariates...
if (convert2mf) newdata <- model.frame(object$dinfo$gp$fake.formula[-2],newdata)
dk <- discrete.mf(object$dinfo$gp,mf=newdata,pmf=NULL,full=FALSE)
dk <- discrete.mf(object$dinfo$gp,mf=newdata,names.pmf=NULL,full=FALSE)
Xd <- list() ### list of discrete model matrices...
if (object$nsdf>0) {
......@@ -1645,6 +1655,7 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
warning("discretization only available with fREML")
} else {
if (!is.null(cluster)) warning("discrete method does not use parallel cluster - use nthreads instead")
if (nthreads>1 && !mgcv.omp()) warning("openMP not available: single threaded computation only")
}
}
if (method%in%c("fREML")&&!is.null(min.sp)) {
......@@ -1728,7 +1739,7 @@ bam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,n
## and indices giving the discretized value for each element of model frame.
## 'discrete' can be null, or contain a discretization size, or
## a discretization size per smooth term.
dk <- discrete.mf(gp,mf,pmf,m=discrete)
dk <- discrete.mf(gp,mf,names(pmf),m=discrete)
mf0 <- dk$mf ## padded discretized model frame
sparse.cons <- 0 ## default constraints required for tensor terms
......@@ -2050,9 +2061,10 @@ bam.update <- function(b,data,chunk.size=10000) {
stop("Model can not be updated")
}
gp<-interpret.gam(b$formula) # interpret the formula
X <- predict(b,newdata=data,type="lpmatrix",na.action=b$NA.action) ## extra part of model matrix
rownames(X) <- NULL
## next 2 lines problematic if there are missings in the response, so now constructed from mf below...
## X <- predict(b,newdata=data,type="lpmatrix",na.action=b$NA.action) ## extra part of model matrix
## rownames(X) <- NULL
cnames <- names(b$coefficients)
AR.start <- NULL ## keep R checks happy
......@@ -2074,7 +2086,10 @@ bam.update <- function(b,data,chunk.size=10000) {
mf <- model.frame(gp$fake.formula,data,xlev=b$xlev,na.action=b$NA.action)
w <- rep(1,nrow(mf))
}
X <- predict(b,newdata=mf,type="lpmatrix",na.action=b$NA.action) ## extra part of model matrix
rownames(X) <- NULL
b$model <- rbind(b$model,mf) ## complete model frame --- old + new
## get response and offset...
......@@ -2157,7 +2172,7 @@ bam.update <- function(b,data,chunk.size=10000) {
object <- list(coefficients=res$beta,edf=res$edf,edf1=res$edf1,edf2=res$edf2,##F=res$F,
gcv.ubre=fit$reml,hat=res$hat,outer.info=list(iter=fit$iter,
message=fit$conv),optimizer="fast-REML",rank=ncol(um$X),
Ve=NULL,Vp=res$V,Vc=res$Vc,db.drho=fit$d1b,scale.estimated = scale<=0)
Ve=res$Ve,Vp=res$Vp,Vc=res$Vc,db.drho=fit$d1b,scale.estimated = scale<=0)
if (scale<=0) { ## get sp's and scale estimate
nsp <- length(fit$rho)
object$sig2 <- object$scale <- exp(fit$rho[nsp])
......
......@@ -1190,7 +1190,7 @@ betar <- function (theta = NULL, link = "logit",eps=.Machine$double.eps*100) {
## scaled t (Natalya Pya) ...
scat <- function (theta = NULL, link = "identity") {
scat <- function (theta = NULL, link = "identity",min.df = 3) {
## Extended family object for scaled t distribution
## length(theta)=2; log theta supplied.
## Written by Natalya Pya.
......@@ -1211,11 +1211,14 @@ scat <- function (theta = NULL, link = "identity") {
## Theta <- NULL;
n.theta <- 2
if (!is.null(theta)&&sum(theta==0)==0) {
if (abs(theta[1]<2)) stop("scaled t df must be >2")
if (abs(theta[1]<=min.df)) {
min.df <- 0.9*abs(theta[1])
warning("Supplied df below min.df. min.df reset")
}
if (sum(theta<0)) {
iniTheta <- c(log(abs(theta[1])-2),log(abs(theta[2]))) ## initial theta supplied
iniTheta <- c(log(abs(theta[1])-min.df),log(abs(theta[2]))) ## initial theta supplied
} else { ## fixed theta supplied
iniTheta <- c(log(theta[1]-2),log(theta[2]))
iniTheta <- c(log(theta[1]-min.df),log(theta[2]))
n.theta <- 0 ## no thetas to estimate
}
} else iniTheta <- c(-2,-1) ## inital log theta value
......@@ -1224,15 +1227,17 @@ scat <- function (theta = NULL, link = "identity") {
assign(".Theta", iniTheta, envir = env)
getTheta <- function(trans=FALSE) {
## trans transforms to the original scale...
th <- get(".Theta")
if (trans) { th <- exp(th); th[1] <- th[1] + 2 }
th <- get(".Theta");min.df <- get(".min.df")
if (trans) { th <- exp(th); th[1] <- th[1] + min.df }
th
}
putTheta <- function(theta) assign(".Theta", theta,envir=environment(sys.function()))
assign(".min.df", min.df, envir = env)
variance <- function(mu) {
th <- get(".Theta")
nu <- exp(th[1])+2; sig <- exp(th[2])
min.df <- get(".min.df")
nu <- exp(th[1])+min.df; sig <- exp(th[2])
sig^2*nu/(nu-2)
}
......@@ -1240,15 +1245,17 @@ scat <- function (theta = NULL, link = "identity") {
dev.resids <- function(y, mu, wt,theta=NULL) {
if (is.null(theta)) theta <- get(".Theta")
nu <- exp(theta[1])+2; sig <- exp(theta[2])
min.df <- get(".min.df")
nu <- exp(theta[1])+min.df; sig <- exp(theta[2])
wt * (nu + 1)*log1p((1/nu)*((y-mu)/sig)^2)
}
Dd <- function(y, mu, theta, wt, level=0) {
## derivatives of the deviance...
## ltheta <- theta
nu <- exp(theta[1])+2; sig <- exp(theta[2])
nu1 <- nu + 1; ym <- y - mu; nu2 <- nu - 2;
min.df <- get(".min.df")
nu <- exp(theta[1])+min.df; sig <- exp(theta[2])
nu1 <- nu + 1; ym <- y - mu; nu2 <- nu - min.df;
a <- 1 + (ym/sig)^2/nu
oo <- list()
## get the quantities needed for IRLS.
......@@ -1315,8 +1322,9 @@ scat <- function (theta = NULL, link = "identity") {
aic <- function(y, mu, theta=NULL, wt, dev) {
min.df <- get(".min.df")
if (is.null(theta)) theta <- get(".Theta")
nu <- exp(theta[1])+2; sig <- exp(theta[2])
nu <- exp(theta[1])+min.df; sig <- exp(theta[2])
term <- -lgamma((nu+1)/2)+ lgamma(nu/2) + log(sig*(pi*nu)^.5) +
(nu+1)*log1p(((y-mu)/sig)^2/nu)/2 ## `-'log likelihood for each observation
2 * sum(term * wt)
......@@ -1324,7 +1332,9 @@ scat <- function (theta = NULL, link = "identity") {
ls <- function(y,w,n,theta,scale) {
## the log saturated likelihood function.
nu <- exp(theta[1])+2; sig <- exp(theta[2]); nu2 <- nu-2;
## (Note these are correct but do not correspond to NP notes)
min.df <- get(".min.df")
nu <- exp(theta[1])+min.df; sig <- exp(theta[2]); nu2 <- nu-min.df;
nu2nu <- nu2/nu; nu12 <- (nu+1)/2
term <- lgamma(nu12) - lgamma(nu/2) - log(sig*(pi*nu)^.5)
ls <- sum(term*w)
......@@ -1358,20 +1368,22 @@ scat <- function (theta = NULL, link = "identity") {
n <- rep(1, nobs)
mustart <- y + (y == 0)*.1
})
postproc <- expression({
object$family$family <-
paste("Scaled t(",paste(round(object$family$getTheta(TRUE),3),collapse=","),")",sep="")
})
rd <- function(mu,wt,scale) {
## simulate data given fitted latent variable in mu
theta <- get(".Theta")
nu <- exp(theta[1])+2; sig <- exp(theta[2])
theta <- get(".Theta");min.df <- get(".min.df")
nu <- exp(theta[1])+min.df; sig <- exp(theta[2])
n <- length(mu)
stats::rt(n=n,df=nu)*sig + mu
}
environment(dev.resids) <- environment(aic) <- environment(getTheta) <-
environment(rd)<- environment(variance) <- environment(putTheta) <- env
environment(dev.resids) <- environment(aic) <- environment(getTheta) <- environment(Dd) <-
environment(ls) <- environment(rd)<- environment(variance) <- environment(putTheta) <- env
structure(list(family = "scaled t", link = linktemp, linkfun = stats$linkfun,
linkinv = stats$linkinv, dev.resids = dev.resids,Dd=Dd,variance=variance,postproc=postproc,
......
......@@ -442,10 +442,34 @@ ldetS <- function(Sl,rho,fixed,np,root=FALSE,repara=TRUE,nt=1) {
} ## end ldetS
Sl.addS <- function(Sl,A,rho) {
## Routine to add total penalty to matrix A. Sl is smooth penalty
## list from Sl.setup, so initial reparameterizations have taken place,
## and should have already been applied to A using Sl.initial.repara
k <- 1
A <- A*1 ## force a copy to be made so that A not modified in calling env!!
for (b in 1:length(Sl)) {
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
if (length(Sl[[b]]$S)==1) { ## singleton
B <- exp(rho[k]);diag <- -1
er <- .Call(C_mgcv_madi,A,B,ind,diag)
## diag(A)[ind] <- diag(A)[ind] + exp(rho[k]) ## penalty is identity times sp
k <- k + 1
} else {
for (j in 1:length(Sl[[b]]$S)) {
B <- exp(rho[k]) * Sl[[b]]$S[[j]]; diag <- 0
.Call(C_mgcv_madi,A,B,ind,diag)
## A[ind,ind] <- A[ind,ind] + exp(rho[k]) * Sl[[b]]$S[[j]]
k <- k + 1
}
}
}
A
} ## Sl.addS
Sl.addS0 <- function(Sl,A,rho) {
## Routine to add total penalty to matrix A. Sl is smooth penalty
## list from Sl.setup, so initial reparameterizations have taken place,
## and should have already been applied to A using Sl.initial.repara
k <- 1
for (b in 1:length(Sl)) {
......@@ -461,7 +485,7 @@ Sl.addS <- function(Sl,A,rho) {
}
}
A
} ## Sl.addS
} ## Sl.addS0
Sl.repara <- function(rp,X,inverse=FALSE,both.sides=TRUE) {
## Apply re-parameterization from ldetS to X, blockwise.
......@@ -710,7 +734,7 @@ Sl.ift <- function(Sl,R,X,y,beta,piv,rp) {
list(bSb=sum(beta*Sb),bSb1=bSb1,bSb2=bSb2,d1b=db,rss =sum(rsd^2),rss1=rss1,rss2=rss2)
} ## end Sl.ift
Sl.iftChol <- function(Sl,XX,R,d,beta,piv) {
Sl.iftChol <- function(Sl,XX,R,d,beta,piv,nt=1) {
## function to obtain derviatives of \hat \beta by implicit differentiation
## and to use these directly to evaluate derivs of b'Sb and the RSS.
## piv contains the pivots from the chol that produced R.
......@@ -720,29 +744,48 @@ Sl.iftChol <- function(Sl,XX,R,d,beta,piv) {
nd <- length(Skb)
np <- length(beta)
db <- matrix(0,np,nd)
rss1 <- bSb1 <- rep(0,nd)
rss1 <- bSb1 <- rep(0,nd)
## alternative all in one code - matches loop results, but
## timing close to identical - modified for parallel exec
D <- matrix(unlist(Skb),nrow(R),nd)
bSb1 <- colSums(beta*D)
#D <- D[piv,]/d[piv]
D1 <- .Call(C_mgcv_Rpforwardsolve,R,D[piv,]/d[piv],nt) ## note R transposed internally unlike forwardsolve
db[piv,] <- -.Call(C_mgcv_Rpbacksolve,R,D1,nt)/d[piv]
#db[piv,] <- -backsolve(R,forwardsolve(t(R),D))/d[piv]
## original serial - a bit slow with very large numbers of smoothing
## parameters....
#Rt <- t(R) ## essential to do this first, or t(R) dominates cost in loop
#for (i in 1:nd) { ## compute the first derivatives
# db[piv,i] <- -backsolve(R,forwardsolve(Rt,Skb[[i]][piv]/d[piv]))/d[piv] ## d beta/ d rho_i
# bSb1[i] <- sum(beta*Skb[[i]]) ## d b'Sb / d_rho_i
#}
for (i in 1:nd) { ## compute the first derivatives
db[piv,i] <- -backsolve(R,forwardsolve(t(R),Skb[[i]][piv]/d[piv]))/d[piv] ## d beta/ d rho_i
bSb1[i] <- sum(beta*Skb[[i]]) ## d b'Sb / d_rho_i
}
XX.db <- XX%*%db
## XX.db <- XX%*%db
XX.db <- .Call(C_mgcv_pmmult2,XX,db,0,0,nt)
#XX.db[piv,] <- d[piv]*(t(R)%*%(R%*%(d[piv]*db[piv,]))) ## X'Xdb
S.db <- Sl.mult(Sl,db,k=0)
##Sk.db <- Sl.termMult(Sl,db,full=TRUE) ## Sk.db[[j]][,k] is S_j d beta / d rho_k
rss2 <- bSb2 <- matrix(0,nd,nd)
for (k in 1:nd) { ## second derivative loop
for (j in k:nd) {
rss2[j,k] <- rss2[k,j] <- 2 * sum(db[,j]*XX.db[,k])
bSb2[j,k] <- bSb2[k,j] <- (k==j)*sum(beta*Skb[[k]]) + 2*(sum(db[,k]*(Skb[[j]]+S.db[,j])) +
sum(db[,j]*Skb[[k]]))
}
}
## following loop very slow if large numbers of smoothing parameters...
#rss2 <- bSb2 <- matrix(0,nd,nd)
#for (k in 1:nd) { ## second derivative loop
# for (j in k:nd) {
# rss2[j,k] <- rss2[k,j] <- 2 * sum(db[,j]*XX.db[,k])
# bSb2[j,k] <- bSb2[k,j] <- (k==j)*sum(beta*Skb[[k]]) + 2*(sum(db[,k]*(Skb[[j]]+S.db[,j])) +
# sum(db[,j]*Skb[[k]]))
# }
#}
## rss2 <- 2 * t(db) %*% XX.db
rss2 <- 2 * .Call(C_mgcv_pmmult2,db,XX.db,1,0,nt)
bSb2 <- diag(x=colSums(beta*D),nrow=nd)
## bSb2 <- bSb2 + 2*(t(db)%*%(D+S.db) + t(D)%*%db)
bSb2 <- bSb2 + 2 * (.Call(C_mgcv_pmmult2,db,D+S.db,1,0,nt) + .Call(C_mgcv_pmmult2,D,db,1,0,nt))
list(bSb=sum(beta*Sb),bSb1=bSb1,bSb2=bSb2,
d1b=db ## BUG - this needs transforming as coef - here, or where used
,rss1=rss1,rss2=rss2)
d1b=db ,rss1=rss1,rss2=rss2)
} ## end Sl.iftChol
......@@ -777,7 +820,7 @@ Sl.fitChol <- function(Sl,XX,f,rho,yy=0,L=NULL,rho0=0,log.phi=0,phi.fixed=TRUE,n
beta[piv] <- backsolve(R,(forwardsolve(t(R),f[piv]/d[piv])))/d[piv]
## get component derivatives based on IFT...
dift <- Sl.iftChol(ldS$Sl,XX,R,d,beta,piv)
dift <- Sl.iftChol(ldS$Sl,XX,R,d,beta,piv,nt=nt)
## now the derivatives of log|X'X+S|
P <- pbsi(R,nt=nt,copy=TRUE) ## invert R
......@@ -1113,7 +1156,7 @@ Sl.Xprep <- function(Sl,X,nt=1) {
} ## end Sl.Xprep
Sl.postproc <- function(Sl,fit,undrop,X0,cov=FALSE,scale = -1,L,nt=nt) {
Sl.postproc <- function(Sl,fit,undrop,X0,cov=FALSE,scale = -1,L,nt=1) {
## reverse the various fitting re-parameterizations.
## X0 is the orginal model matrix before any re-parameterization
## or parameter dropping. Sl is also the original *before parameter
......
......@@ -17,6 +17,7 @@ gam.reparam <- function(rS,lsp,deriv)
## Ouputs:
## S -- the total penalty matrix similarity transformed for stability
## rS -- the component square roots, transformed in the same way
## - tcrossprod(rS[[i]]) = rS[[i]] %*% t(rS[[i]]) gives the matrix penalty component.
## Qs -- the orthogonal transformation matrix S = t(Qs)%*%S0%*%Qs, where S0 is the
## untransformed total penalty implied by sp and rS on input
## E -- the square root of the transformed S (obtained in a stable way by pre-conditioning)
......@@ -62,66 +63,6 @@ gam.reparam <- function(rS,lsp,deriv)
} ## gam.reparam
get.Eb <- function(rS,rank)
## temporary routine to get balanced sqrt of total penalty
## should eventually be moved to estimate.gam, or gam.setup,
## as it's sp independent, but that means re doing gam.fit3 call list,
## which should only be done after method is tested
{ q <- nrow(rS[[1]])
S <- matrix(0,q,q)
for (i in 1:length(rS)) {
Si <- tcrossprod(rS[[i]]) ## rS[[i]]%*%t(rS[[i]])
S <- S + Si/sqrt(sum(Si^2))
}
t(mroot(S,rank=rank)) ## E such that E'E = S
} ## get.Eb
huberp <- function(wp,dof,k=1.5,tol=.Machine$double.eps^.5) {
## function to obtain huber estimate of scale from Pearson residuals, simplified
## from 'hubers' from MASS package
s0 <- mad(wp) ## initial scale estimate
th <- 2*pnorm(k) - 1
beta <- th + k^2 * (1 - th) - 2 * k * dnorm(k)
for (i in 1:50) {
r <- pmin(pmax(wp,-k*s0),k*s0)
ss <- sum(r^2)/dof
s1 <- sqrt(ss/beta)
if (abs(s1-s0)<tol*s0) break
s0 <- s1
}
if (i==50) warning("Huber scale estiamte not converged")
s1^2
} ## huberp
gam.scale <- function(wp,wd,dof,extra=0) {
## obtain estimates of the scale parameter, using the weighted Pearson and
## deviance residuals and the residual effective degrees of freedom.
## Problem is that Pearson is unbiased, but potentially unstable (e.g.
## when count is 1 but mean is tiny, so that pearson residual is enormous,
## although deviance residual is much less extreme).
pearson <- (sum(wp^2)+extra)/dof
deviance <- (sum(wd^2)+extra)/dof
if (extra==0) robust <- huberp(wp,dof) else {
## now scale deviance residuals to have magnitude similar
## to pearson and compute new estimator.
kd <- wd
ind <- wd > 0
kd[ind] <- wd[ind]*median(wp[ind]/wd[ind])
ind <- wd < 0
kd[ind] <- wd[ind]*median(wp[ind]/wd[ind])
robust <- (sum(kd^2)+extra)/dof
## force estimate to lie between deviance and pearson estimators
if (pearson > deviance) {
if (robust < deviance) robust <- deviance
if (robust > pearson) robust <- pearson
} else {
if (robust > deviance) robust <- deviance
if (robust < pearson) robust <- pearson
}
}
list(pearson=pearson,deviance=deviance,robust=robust)
}
gam.fit3 <- function (x, y, sp, Eb,UrS=list(),
weights = rep(1, nobs), start = NULL, etastart = NULL,
......@@ -651,14 +592,6 @@ gam.fit3 <- function (x, y, sp, Eb,UrS=list(),
}
trA <- oo$trA;
# wpr <- (y-mu) *sqrt(weights/family$variance(mu)) ## weighted pearson residuals
# se <- gam.scale(wpr,wdr,n.true-trA,dev.extra) ## get scale estimates
# pearson.warning <- NULL
# if (control$scale.est=="pearson") {
# scale.est <- se$pearson
# if (scale.est > 4 * se$robust) pearson.warning <- TRUE
# } else scale.est <- if (control$scale.est=="deviance") se$deviance else se$robust
if (control$scale.est%in%c("pearson","fletcher","Pearson","Fletcher")) {
pearson <- sum(weights*(y-mu)^2/family$variance(mu))
scale.est <- (pearson+dev.extra)/(n.true-trA)
......@@ -941,9 +874,6 @@ gam.fit3.post.proc <- function(X,L,lsp0,S,off,object) {
edf <- diag(F) ## effective degrees of freedom
edf1 <- 2*edf - rowSums(t(F)*F) ## alternative
## check on plausibility of scale (estimate)
##if (object$scale.estimated&&!is.null(object$pearson.warning)) warning("Pearson scale estimate maybe unstable. See ?gam.scale.")
## edf <- rowSums(PKt*t(sqrt(object$weights)*X))
## Ve <- PKt%*%t(PKt)*object$scale ## frequentist cov
Ve <- F%*%Vb ## not quite as stable as above, but quicker
......@@ -1371,6 +1301,14 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
okc <- FALSE
eps <- 1e-4
deriv <- 2
if (okc) { ## optional call to fitting to facilitate debugging
trial.der <- 2 ## can reset if derivs not wanted
b <- gam.fit3(x=X, y=y, sp=L%*%lsp+lsp0,Eb=Eb,UrS=UrS,
offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=trial.der,
control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=start,
mustart=mustart,scoreType=scoreType,null.coef=null.coef,
pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
}
deriv.check(x=X, y=y, sp=L%*%lsp+lsp0, Eb=Eb,UrS=UrS,
offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=deriv,
control=control,gamma=gamma,scale=scale,
......@@ -1407,6 +1345,8 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
## get the trial step ...
eh <- eigen(hess1,symmetric=TRUE)
d <- eh$values;U <- eh$vectors
indef <- (sum(-d > abs(d[1])*.Machine$double.eps^.5)>0) ## indefinite problem
## set eigen-values to their absolute value - heuristically appealing
## as it avoids very long steps being proposed for indefinte components,
## unlike setting -ve e.v.s to very small +ve constant...
......@@ -1427,13 +1367,11 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
ms <- max(abs(Nstep)) ## note smaller permitted step if !pdef
mns <- maxNstep
# mns <- if (pdef) maxNstep else maxNstep/3 ## more cautious if not pdef
# if (!all(Nstep*Sstep >= 0)) mns <- mns/2 ## bit more cautious if directions differ
if (ms>maxNstep) Nstep <- mns * Nstep/ms
sd.unused <- TRUE ## steepest descent direction not yet tried
## try the step ...
if (sp.trace) cat(lsp,"\n")
......@@ -1644,7 +1582,7 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
score.hist[i] <- score
## test for convergence
converged <- TRUE
converged <- !indef ## not converged if indefinite
if (reml) score.scale <- abs(log(b$scale.est)) + abs(score) else
score.scale <- abs(b$scale.est) + abs(score)
grad2 <- diag(hess)
......@@ -2481,6 +2419,7 @@ fix.family.var <- function(fam)
if (!inherits(fam,"family")) stop("fam not a family object")
if (!is.null(fam$dvar)&&!is.null(fam$d2var)&&!is.null(fam$d3var)) return(fam)
family <- fam$family
fam$scale <- -1
if (family=="gaussian") {
fam$d3var <- fam$d2var <- fam$dvar <- function(mu) rep.int(0,length(mu))
return(fam)
......@@ -2488,12 +2427,14 @@ fix.family.var <- function(fam)
if (family=="poisson"||family=="quasipoisson") {
fam$dvar <- function(mu) rep.int(1,length(mu))
fam$d3var <- fam$d2var <- function(mu) rep.int(0,length(mu))
if (family=="poisson") fam$scale <- 1
return(fam)
}
if (family=="binomial"||family=="quasibinomial") {
fam$dvar <- function(mu) 1-2*mu
fam$d2var <- function(mu) rep.int(-2,length(mu))
fam$d3var <- function(mu) rep.int(0,length(mu))
if (family=="binomial") fam$scale <- 1
return(fam)
}
if (family=="Gamma") {
......
......@@ -59,7 +59,7 @@ dDeta <- function(y,mu,wt,theta,fam,deriv=0) {
d$Deta3th <- ig13*r$Dmu3th - 3 *r$Dmu2th*g2g*ig12 + r$Dmuth*(3*g2g^2-g3g)*ig1
}
d
} ## dDmu
} ## dDeta
fetad.test <- function(y,mu,wt,theta,fam,eps = 1e-7,plot=TRUE) {
## test family derivatives w.r.t. eta
......@@ -325,17 +325,19 @@ gam.fit4 <- function(x, y, sp, Eb,UrS=list(),
coefold <- null.coef
conv <- boundary <- FALSE
dd <- dDeta(y,mu,weights,theta,family,0) ## derivatives of deviance w.r.t. eta
w <- dd$Deta2 * .5;
wz <- w*(eta-offset) - .5*dd$Deta
z <- (eta-offset) - dd$Deta.Deta2
good <- is.finite(z)&is.finite(w)
for (