Commit f2009880 authored by Dirk Eddelbuettel's avatar Dirk Eddelbuettel

Import Debian changes 1.8-19-1

mgcv (1.8-19-1) unstable; urgency=medium

  * New upstream release
  
  * debian/control: Set Standards-Version: to current version 
  * debian/compat: Set to 9

  * debina/README.source: Added
parents b409bbef e0667888
** denotes quite substantial/important changes
*** denotes really big changes
Currently deprecated and liable to be removed:
- single penalty tensor product smooths.
- p.type!=0 in summary.gam.
Currently deprecated and liable to be removed:
- gam performance iteration (1.8-19, Sept 2017)
1.8-19
** bam() now accepts extended families (i.e. nb, tw, ocat etc)
* cox.ph now allows stratification (i.e. baseline hazard can differ between
groups).
* Example code for matched case control in ?cox.ph was just plain wrong. Now
fixed. Thanks to Patrick Farrell.
* bam(...,discrete=TRUE) tolerance for judging whether smoothing parameters
are on boundary was far too low, so that sps could become so large that
numerical instability set in. Fixed. Thanks to Paul Rosenfield.
* p.type!=0 removed in summary.gam (previously deprecated)
* single penalty tensor product smooths removed (previously deprecated).
* gam(...,optimizer="perf") deprecated.
* extra divergence check added to bam gam default gam fitting (similar to
discrete method).
* preinitialize and postproc components of extended families are now functions,
not expressions.
* coefficient divergence check was missing in bam(...,discrete=TRUE) release
code - now fixed.
* gaulss family link derivatives modified to avoid overflow. Thanks to
Kristen Beck for reporting the problem.
* redundant 'n' argument removed from extended family 'ls' functions.
* Convergence checking can step fail earlier in fast.REML.fit. If trial step
is no improvement and equal to previous best (to within a tolerance), then
terminate with step failure after a few step halvings if situation persists.
Thanks to Zheyuan Li for reporting problem.
1.8-18
......
Package: mgcv
Version: 1.8-18
Version: 1.8-19
Author: Simon Wood <simon.wood@r-project.org>
Maintainer: Simon Wood <simon.wood@r-project.org>
Title: Mixed GAM Computation Vehicle with Automatic Smoothness
Estimation
Description: Generalized additive (mixed) models, some of their extensions and
other generalized ridge regression with multiple smoothing
parameter estimation by (REstricted) Marginal Likelihood,
parameter estimation by (Restricted) Marginal Likelihood,
Generalized Cross Validation and similar. Includes a gam()
function, a wide variety of smoothers, JAGS support and
distributions beyond the exponential family.
......@@ -18,6 +18,6 @@ LazyLoad: yes
ByteCompile: yes
License: GPL (>= 2)
NeedsCompilation: yes
Packaged: 2017-07-26 15:00:29 UTC; sw283
Packaged: 2017-08-29 08:25:01 UTC; sw283
Repository: CRAN
Date/Publication: 2017-07-28 07:28:50 UTC
Date/Publication: 2017-08-29 11:55:09 UTC
This diff is collapsed.
......@@ -97,6 +97,7 @@ importMethodsFrom(Matrix,t,colMeans,colSums,chol,solve,lu,expand)
importFrom(Matrix,Diagonal,sparseMatrix,Matrix)
importFrom(methods,cbind2,as)
importFrom(stats,weighted.mean)
importFrom(stats,optimize)
S3method(anova, gam)
S3method(influence, gam)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
## code for fast REML computation. key feature is that first and
## second derivatives come at no increase in leading order
## computational cost, relative to evaluation!
## (c) Simon N. Wood, 2010-2016
## (c) Simon N. Wood, 2010-2017
Sl.setup <- function(G) {
## Sets up a list representing a block diagonal penalty matrix.
......@@ -227,6 +227,28 @@ Sl.setup <- function(G) {
Sl ## the penalty list
} ## end of Sl.setup
Sl.Sb <- function(Sl,rho,beta) {
## computes S %*% beta where S is total penalty matrix defined by Sl and rho,
## the log smoothing parameters. Assumes initial re-parameterization has taken
## place, so single penalties are multiples of identity and uses S for
## multi-S blocks. Logic is identical to Sl.addS.
k <- 1
a <- beta * 0
for (b in 1:length(Sl)) {
ind <- (Sl[[b]]$start:Sl[[b]]$stop)[Sl[[b]]$ind]
if (length(Sl[[b]]$S)==1) { ## singleton - multiple of identity
a[ind] <- a[ind] + beta[ind] * exp(rho[k])
k <- k + 1
} else { ## mulit-S block
for (j in 1:length(Sl[[b]]$S)) {
a[ind] <- a[ind] + exp(rho[k]) * (Sl[[b]]$S[[j]] %*% beta[ind])
k <- k + 1
}
}
}
a
} ## Sl.Sb
Sl.initial.repara <- function(Sl,X,inverse=FALSE,both.sides=TRUE,cov=TRUE,nt=1) {
## Routine to apply initial Sl re-parameterization to model matrix X,
## or, if inverse==TRUE, to apply inverse re-para to parameter vector
......@@ -748,7 +770,7 @@ Sl.iftChol <- function(Sl,XX,R,d,beta,piv,nt=1) {
## alternative all in one code - matches loop results, but
## timing close to identical - modified for parallel exec
D <- matrix(unlist(Skb),nrow(R),nd)
D <- matrix(unlist(Skb),nrow(XX),nd)
bSb1 <- colSums(beta*D)
#D <- D[piv,]/d[piv]
D1 <- .Call(C_mgcv_Rpforwardsolve,R,D[piv,]/d[piv],nt) ## note R transposed internally unlike forwardsolve
......@@ -794,14 +816,14 @@ Sl.fitChol <- function(Sl,XX,f,rho,yy=0,L=NULL,rho0=0,log.phi=0,phi.fixed=TRUE,n
## with penalty defined by Sl and rho, and evaluates a REML Newton step, the REML
## gradiant and the the estimated coefs bhat. If phi.fixed=FALSE then we need
## yy = y'Wy in order to get derivsatives w.r.t. phi.
tol <- as.numeric(tol)
rho <- if (is.null(L)) rho + rho0 else L%*%rho + rho0
if (length(rho)<length(rho0)) rho <- rho0 ## ncol(L)==0 or length(rho)==0
## get log|S|_+ without stability transform...
fixed <- rep(FALSE,length(rho))
ldS <- ldetS(Sl,rho,fixed,np=ncol(XX),root=FALSE,repara=FALSE,nt=nt)
## now the Choleki factor of the penalized Hessian...
## now the Cholesky factor of the penalized Hessian...
#XXp <- XX+crossprod(ldS$E) ## penalized Hessian
XXp <- Sl.addS(Sl,XX,rho)
......@@ -819,7 +841,7 @@ Sl.fitChol <- function(Sl,XX,f,rho,yy=0,L=NULL,rho0=0,log.phi=0,phi.fixed=TRUE,n
beta <- rep(0,p)
beta[piv] <- backsolve(R,(forwardsolve(t(R),f[piv]/d[piv])))/d[piv]
## get component derivatives based on IFT...
## get component derivatives based on IFT (noting that ldS$Sl has s.p.s updated to current)
dift <- Sl.iftChol(ldS$Sl,XX,R,d,beta,piv,nt=nt)
## now the derivatives of log|X'X+S|
......@@ -1018,15 +1040,25 @@ fast.REML.fit <- function(Sl,X,y,rho,L=NULL,rho.0=NULL,log.phi=0,phi.fixed=TRUE,
rho1 <- L%*%(rho + step)+rho.0; if (!phi.fixed) log.phi <- rho1[nr+1]
trial <- Sl.fit(Sl,X,y,rho1[1:nr],fixed,log.phi,phi.fixed,rss.extra,nobs,Mp,nt=nt)
k <- 0
while (trial$reml>best$reml && k<35) { ## step half until improvement
not.moved <- 0 ## count number of consecutive steps of essentially no change from best
while (trial$reml>best$reml) { ## step half until improvement or failure
## idea with the following is to count the number of consecutive step halvings
## without a numerically significant change from best$reml, since
## this is an early indicator of step failure.
if (trial$reml-best$reml < conv.tol*reml.scale) not.moved <- not.moved + 1 else not.moved <- 0
if (k==25||sum(rho != rho + step)==0||not.moved>3) {
step.failed <- TRUE
break
}
step <- step/2;k <- k + 1
rho1 <- L%*%(rho + step)+rho.0; if (!phi.fixed) log.phi <- rho1[nr+1]
trial <- Sl.fit(Sl,X,y,rho1[1:nr],fixed,log.phi,phi.fixed,rss.extra,nobs,Mp,nt=nt)
}
if ((k==35 && trial$reml>best$reml)||(sum(rho != rho + step)==0)) { ## step has failed
step.failed <- TRUE
break ## can get no further
}
if (step.failed) break ## can get no further
#if ((k==35 && trial$reml>best$reml)||(sum(rho != rho + step)==0)) { ## step has failed
# step.failed <- TRUE
# break ## can get no further
#}
## At this stage the step has been successful.
## Need to test for convergence...
converged <- TRUE
......
......@@ -1346,7 +1346,9 @@ newton <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
eh <- eigen(hess1,symmetric=TRUE)
d <- eh$values;U <- eh$vectors
indef <- (sum(-d > abs(d[1])*.Machine$double.eps^.5)>0) ## indefinite problem
## need a different test if there is only one smoothing parameter,
## otherwise infinite sp can count as always indefinite...
if (indef && length(d)==1) indef <- d < -score.scale * .Machine$double.eps^.5
## set eigen-values to their absolute value - heuristically appealing
## as it avoids very long steps being proposed for indefinte components,
## unlike setting -ve e.v.s to very small +ve constant...
......
......@@ -602,7 +602,7 @@ gam.fit4 <- function(x, y, sp, Eb,UrS=list(),
D2 <- matrix(oo$D2,ntot,ntot); ldet2 <- matrix(oo$ldet2,ntot,ntot)
bSb2 <- matrix(oo$P2,ntot,ntot)
## compute the REML score...
ls <- family$ls(y,weights,n,theta,scale)
ls <- family$ls(y,weights,theta,scale)
nt <- length(theta)
lsth1 <- ls$lsth1[1:nt];
lsth2 <- as.matrix(ls$lsth2)[1:nt,1:nt] ## exclude any derivs w.r.t log scale here
......@@ -771,6 +771,7 @@ gam.fit5 <- function(x,y,lsp,Sl,weights=NULL,offset=NULL,deriv=2,family,
grad <- ll$lb - St%*%coef
Hp <- -ll$lbb+St
D <- diag(Hp)
if (sum(!is.finite(D))>0) stop("non finite values in Hessian")
indefinite <- FALSE
if (sum(D <= 0)) { ## Hessian indefinite, for sure
D <- rep(1,ncol(Hp))
......
......@@ -523,11 +523,11 @@ gaulss <- function(link=list("identity","logb"),b=0.01) {
stats[[2]]$mu.eta <- eval(parse(text=
paste("function(eta) { ee <- exp(eta); -ee/(ee +",b,")^2 }")))
stats[[2]]$d2link <- eval(parse(text=
paste("function(mu) { mub <- 1 - mu *",b,";(2*mub-1)/(mub*mu)^2}" )))
paste("function(mu) { mub <- pmax(1 - mu *",b,",.Machine$double.eps);(2*mub-1)/(mub*mu)^2}" )))
stats[[2]]$d3link <- eval(parse(text=
paste("function(mu) { mub <- 1 - mu *",b,";((1-mub)*mub*6-2)/(mub*mu)^3}" )))
paste("function(mu) { mub <- pmax(1 - mu *",b,",.Machine$double.eps);((1-mub)*mub*6-2)/(mub*mu)^3}" )))
stats[[2]]$d4link <- eval(parse(text=
paste("function(mu) { mub <- 1 - mu *",b,";(((24*mub-36)*mub+24)*mub-6)/(mub*mu)^4}")))
paste("function(mu) { mub <- pmax(1 - mu *",b,",.Machine$double.eps);(((24*mub-36)*mub+24)*mub-6)/(mub*mu)^4}")))
} else stop(link[[2]]," link not available for precision parameter of gaulss")
residuals <- function(object,type=c("deviance","pearson","response")) {
......
This diff is collapsed.
......@@ -357,7 +357,7 @@ ti <- function(..., k=NA,bs="cr",m=NA,d=NA,by=NA,fx=FALSE,np=TRUE,xt=NULL,id=NUL
object
} ## ti
te <- function(..., k=NA,bs="cr",m=NA,d=NA,by=NA,fx=FALSE,mp=TRUE,np=TRUE,xt=NULL,id=NULL,sp=NULL,pc=NULL)
te <- function(..., k=NA,bs="cr",m=NA,d=NA,by=NA,fx=FALSE,np=TRUE,xt=NULL,id=NULL,sp=NULL,pc=NULL)
# function for use in gam formulae to specify a tensor product smooth term.
# e.g. te(x0,x1,x2,k=c(5,4,4),bs=c("tp","cr","cr"),m=c(1,1,2),by=x3) specifies a rank 80 tensor
# product spline. The first basis is rank 5, t.p.r.s. basis penalty order 1, and the next 2 bases
......@@ -371,7 +371,6 @@ te <- function(..., k=NA,bs="cr",m=NA,d=NA,by=NA,fx=FALSE,mp=TRUE,np=TRUE,xt=NUL
# * by - the by variable name
# * fx - array indicating which margins should be treated as fixed (i.e unpenalized).
# * label - label for this term
# * mp - TRUE to use a penalty per dimension, FALSE to use a single penalty
{ vars <- as.list(substitute(list(...)))[-1] # gets terms to be smoothed without evaluation
dim <- length(vars) # dimension of smoother
by.var <- deparse(substitute(by),backtick=TRUE) #getting the name of the by variable
......@@ -449,7 +448,7 @@ te <- function(..., k=NA,bs="cr",m=NA,d=NA,by=NA,fx=FALSE,mp=TRUE,np=TRUE,xt=NUL
j<-j1+1
}
# assemble term.label
if (mp) mp <- TRUE else mp <- FALSE
#if (mp) mp <- TRUE else mp <- FALSE
if (np) np <- TRUE else np <- FALSE
full.call<-paste("te(",term[1],sep="")
if (dim>1) for (i in 2:dim) full.call<-paste(full.call,",",term[i],sep="")
......@@ -461,8 +460,8 @@ te <- function(..., k=NA,bs="cr",m=NA,d=NA,by=NA,fx=FALSE,mp=TRUE,np=TRUE,xt=NUL
}
id <- as.character(id)
}
ret<-list(margin=margin,term=term,by=by.var,fx=fx,label=label,dim=dim,mp=mp,np=np,
id=id,sp=sp,inter=FALSE)
ret<-list(margin=margin,term=term,by=by.var,fx=fx,label=label,dim=dim,#mp=mp,
np=np,id=id,sp=sp,inter=FALSE)
if (!is.null(pc)) {
if (length(pc)<d) stop("supply a value for each variable for a point constraint")
if (!is.list(pc)) pc <- as.list(pc)
......@@ -799,25 +798,25 @@ smooth.construct.tensor.smooth.spec <- function(object,data,knots) {
max.rank <- prod(d)
r <- max.rank*r/d # penalty ranks
X <- tensor.prod.model.matrix(Xm)
if (object$mp) { # multiple penalties
S <- tensor.prod.penalties(Sm)
for (i in m:1) if (object$fx[i]) {
# if (object$mp) { # multiple penalties
S <- tensor.prod.penalties(Sm)
for (i in m:1) if (object$fx[i]) {
S[[i]] <- NULL # remove penalties for un-penalized margins
r <- r[-i] # remove corresponding rank from list
}
} else { # single penalty
warning("single penalty tensor product smooths are deprecated and likely to be removed soon")
S <- Sm[[1]];r <- object$margin[[i]]$rank
if (m>1) for (i in 2:m)
{ S <- S%x%Sm[[i]]
r <- r*object$margin[[i]]$rank
}
if (sum(object$fx)==m)
{ S <- list();object$fixed=TRUE } else
{ S <-list(S);object$fixed=FALSE }
nr <- max.rank-r
object$bs.dim <- max.rank
}
# } else { # single penalty
# warning("single penalty tensor product smooths are deprecated and likely to be removed soon")
# S <- Sm[[1]];r <- object$margin[[i]]$rank
# if (m>1) for (i in 2:m)
# { S <- S%x%Sm[[i]]
# r <- r*object$margin[[i]]$rank
# }
# if (sum(object$fx)==m)
# { S <- list();object$fixed=TRUE } else
# { S <-list(S);object$fixed=FALSE }
# nr <- max.rank-r
# object$bs.dim <- max.rank
# }
if (!is.null(object$margin[[1]]$xt$dropu)&&object$margin[[1]]$xt$dropu) {
ind <- which(colSums(abs(X))!=0)
......
Explanation for binary files inside source package according to
http://lists.debian.org/debian-devel/2013/09/msg00332.html
Files: inst/external/CAex_slots.rda
Documentation: man/CAex.Rd
-> "Difficult" Eigen factorization matrix from contributed email
covering 180 days starting with Jan 2, 2007.
Files: inst/external/KNex_slots.rda
Documentation: man/KNex.Rd
-> Koenker and Ng exampple from SparseM package
Files: inst/external/USCounties_slots.rda
Documentation: man/KNex.Rd
-> US Counties data from Ord (1975)
-- Dirk Eddelbuettel <edd@debian.org>, Thu, 17 Aug 2017 20:59:21 -0500
Explanation for binary files inside source package according to
http://lists.debian.org/debian-devel/2013/09/msg00332.html
Files: data/columb.polys.rda
Documentation: man/columb.Rd
-> By district crime data from Columbus OH. Useful for illustrating
use of simple Markov Random Field smoothers.
The data are adapted from the 'columbus' example in the 'spdep'
package, where the original source is given as:
Anselin, Luc. 1988. Spatial econometrics: methods and
models. Dordrecht: Kluwer Academic, Table 12.1 p. 189.
Files: data/columb.polys.rda
Documentation: man/columb.Rd
-> By district crime data from Columbus OH, together with polygons
describing district shape. Useful for illustrating use of simple
Markov Random Field smoothers.
The data are adapted from the 'columbus' example in the 'spdep'
package, where the original source is given as:
Anselin, Luc. 1988. Spatial econometrics: methods and
models. Dordrecht: Kluwer Academic, Table 12.1 p. 189.
-- Dirk Eddelbuettel <edd@debian.org>, Tue, 29 Aug 2017 22:00:34 -0500
mgcv (1.8-19-1) unstable; urgency=medium
* New upstream release
* debian/control: Set Standards-Version: to current version
* debian/compat: Set to 9
* debina/README.source: Added
-- Dirk Eddelbuettel <edd@debian.org> Tue, 29 Aug 2017 21:49:26 -0500
mgcv (1.8-18-1) unstable; urgency=medium
* New upstream release
......
......@@ -3,7 +3,7 @@ Section: gnu-r
Priority: optional
Maintainer: Dirk Eddelbuettel <edd@debian.org>
Build-Depends: debhelper (>= 7.0.0), r-base-dev (>= 3.4.1), r-cran-nlme, r-cran-matrix, cdbs
Standards-Version: 3.9.8
Standards-Version: 4.0.0
Package: r-cran-mgcv
Architecture: any
......
citHeader("2011 for generalized additive model method;
citHeader("2011 for generalized additive model method; 2016 for beyond exponential family;
2004 for strictly additive GCV based model method and basics of gamm;
2006 for overview; 2003 for thin plate regression splines;
2016 for jagam.")
2017 for overview; 2003 for thin plate regression splines.")
bibentry(
bibtype="Article",
......@@ -17,6 +16,17 @@ likelihood estimation of semiparametric generalized linear models",
and marginal likelihood estimation of semiparametric generalized linear
models. Journal of the Royal Statistical Society (B) 73(1):3-36" )
bibentry(
bibtype="Article",
title= "Smoothing parameter and model selection for general smooth models (with discussion)",
author= "S.N. Wood and N. and Pya and B. S{\"a}fken",
journal= "Journal of the American Statistical Association",
year= "2016",
pages= "1548-1575",
volume= "111",
textVersion="Wood S.N., N. Pya and B. Saefken (2016) Smoothing parameter and model selection for general smooth models (with discussion). Journal of the American Statistical Association 111:1548-1575."
)
bibentry(
bibtype="Article",
title= "Stable and efficient multiple smoothing parameter estimation for
......@@ -32,14 +42,17 @@ bibentry(
American Statistical Association. 99:673-686."
)
bibentry(
bibtype="Book",
title="Generalized Additive Models: An Introduction with R",
year="2006",
year="2017",
author="S.N Wood",
edition="2",
publisher="Chapman and Hall/CRC",
textVersion="Wood, S.N. (2006) Generalized Additive Models: An
Introduction with R. Chapman and Hall/CRC. "
textVersion="Wood, S.N. (2017) Generalized Additive Models: An
Introduction with R (wnd edition). Chapman and Hall/CRC. "
)
bibentry(
......@@ -55,19 +68,3 @@ bibentry(
Journal of the Royal Statistical Society (B) 65(1):95-114." )
bibentry(bibtype = "Article",
title = "Just Another Gibbs Additive Modeler: Interfacing {JAGS} and {mgcv}",
author = "S. N. Wood",
journal = "Journal of Statistical Software",
year = "2016",
volume = "75",
number = "7",
pages = "1--15",
doi = "10.18637/jss.v075.i07",
textVersion =
paste("Simon N. Wood (2016).",
"Just Another Gibbs Additive Modeler: Interfacing JAGS and mgcv.",
"Journal of Statistical Software, 75(7), 1-15.",
"doi:10.18637/jss.v075.i07")
)
......@@ -2,7 +2,7 @@
\alias{betar}
%- Also NEED an `\alias' for EACH other topic documented here.
\title{GAM beta regression family}
\description{Family for use with \code{\link{gam}}, implementing regression for beta distributed data on (0,1).
\description{Family for use with \code{\link{gam}} or \code{\link{bam}}, implementing regression for beta distributed data on (0,1).
A linear predictor controls the mean, \eqn{\mu}{mu} of the beta distribution, while the variance is then
\eqn{\mu(1-\mu)/(1+\phi)}{mu(1-mu)/(1+phi)}, with parameter \eqn{\phi}{phi} being estimated during
fitting, alongside the smoothing parameters.
......@@ -31,7 +31,7 @@ keep the log likelihood bounded for all parameter values. Any data exactly at 0
%- maybe also `usage' for other objects documented here.
\author{ Natalya Pya (nyp20@bath.ac.uk) and Simon Wood (s.wood@r-project.org)
\author{ Natalya Pya (nat.pya@gmail.com) and Simon Wood (s.wood@r-project.org)
}
\section{WARNINGS}{
......
......@@ -44,7 +44,7 @@ corresponding \code{\link{Predict.matrix}} method function: see the example code
\references{
Wood S.N. (2006) Generalized Additive Models: An Introduction with R. Chapman
Wood S.N. (2017) Generalized Additive Models: An Introduction with R (2nd edition). Chapman
and Hall/CRC Press.
}
......
......@@ -45,7 +45,7 @@ to predict from a fitted \code{\link{gam}} model. See \code{\link{Predict.matrix
}
\references{
Wood S.N. (2006) Generalized Additive Models: An Introduction with R. Chapman
Wood S.N. (2017) Generalized Additive Models: An Introduction with R (2nd edition). Chapman
and Hall/CRC Press.
}
......
File mode changed from 100644 to 100755
......@@ -9,7 +9,7 @@ full likelihood is desirable. \code{Tweedie} is for use with fixed \code{p}. \co
is to be estimated during fitting. For fixed \code{p} between 1 and 2 the Tweedie is an exponential family
distribution with variance given by the mean to the power \code{p}.
\code{tw} is only useable with \code{\link{gam}}, not \code{bam} or \code{gamm}. \code{Tweedie} works with all three.
\code{tw} is only useable with \code{\link{gam}} and \code{\link{bam}} but not \code{gamm}. \code{Tweedie} works with all three.
}
\usage{
......
......@@ -12,7 +12,7 @@ to zero. Models to be compared should be fitted to the same data using the same
}
\usage{
\method{anova}{gam}(object, ..., dispersion = NULL, test = NULL,
freq = FALSE,p.type=0)
freq = FALSE)
\method{print}{anova.gam}(x, digits = max(3, getOption("digits") - 3),...)
}
%- maybe also `usage' for other objects documented here.
......@@ -24,8 +24,6 @@ to zero. Models to be compared should be fitted to the same data using the same
\code{"Chisq"}, \code{"F"} or \code{"Cp"}. }
\item{freq}{whether to use frequentist or Bayesian approximations for parametric term
p-values. See \code{\link{summary.gam}} for details.}
\item{p.type}{selects exact test statistic to use for single smooth term p-values. See
\code{\link{summary.gam}} for details.}
\item{digits}{number of digits to use when printing output.}
}
\details{ If more than one fitted model is provided than \code{anova.glm} is
......
......@@ -5,14 +5,15 @@
\title{Generalized additive models for very large datasets}
\description{ Fits a generalized additive model (GAM) to a very large
data set, the term `GAM' being taken to include any quadratically penalized GLM.
data set, the term `GAM' being taken to include any quadratically penalized GLM (the extended families
listed in \code{\link{family.mgcv}} can also be used).
The degree of smoothness of model terms is estimated as part of
fitting. In use the function is much like \code{\link{gam}}, except that the numerical methods
are designed for datasets containing upwards of several tens of thousands of data (see Wood, Goude and Shaw, 2015). The advantage
of \code{bam} is much lower memory footprint than \code{\link{gam}}, but it can also be much faster,
for large datasets. \code{bam} can also compute on a cluster set up by the \link[parallel]{parallel} package.
An alternative fitting approach (Wood et al. 2016) is provided by the \code{discrete==TRUE} method. In this case a method based on discretization of covariate values and C code level parallelization (controlled by the \code{nthreads} argument instead of the \code{cluster} argument) is used. This extends both the data set and model size usable.
An alternative fitting approach (Wood et al. 2016) is provided by the \code{discrete==TRUE} method. In this case a method based on discretization of covariate values and C code level parallelization (controlled by the \code{nthreads} argument instead of the \code{cluster} argument) is used. This extends both the data set and model size that are practical.
}
\usage{
......@@ -35,8 +36,7 @@ to the right hand side to specify that the linear predictor depends on smooth fu
\item{family}{
This is a family object specifying the distribution and link to use in
fitting etc. See \code{\link{glm}} and \code{\link{family}} for more
details. A negative binomial family is provided: see \code{\link{negbin}}, but
only the known theta case is supported by \code{bam}.
details. The extended families listed in \code{\link{family.mgcv}} can also be used.
}
\item{data}{ A data frame or list containing the model response variable and
......@@ -119,14 +119,15 @@ Reset to \code{4*p} if \code{chunk.size < 4*p} where \code{p} is the number of c
\item{rho}{An AR1 error model can be used for the residuals (based on dataframe order), of Gaussian-identity
link models. This is the AR1 correlation parameter. Standardized residuals (approximately
uncorrelated under correct model) returned in
\code{std.rsd} if non zero. }
\code{std.rsd} if non zero. Also usable with other models when \code{discrete=TRUE}, in which case the AR model
is applied to the working residuals and corresponds to a GEE approximation.}
\item{AR.start}{logical variable of same length as data, \code{TRUE} at first observation of an independent
section of AR1 correlation. Very first observation in data frame does not need this. If \code{NULL} then
there are no breaks in AR1 correlaion.}
\item{discrete}{with \code{method="fREML"} it is possible to discretize covariates for storage and efficiency reasons.
If \code{discrete} is \code{TRUE}, a number or a vector of numbers for each smoother term, then discretization happens. If numbers are supplied they give the number of discretization bins. Experimental at present. }
If \code{discrete} is \code{TRUE}, a number or a vector of numbers for each smoother term, then discretization happens. If numbers are supplied they give the number of discretization bins.}
\item{cluster}{\code{bam} can compute the computationally dominant QR decomposition in parallel using \link[parallel]{parLapply}
from the \code{parallel} package, if it is supplied with a cluster on which to do this (a cluster here can be some cores of a
......@@ -189,7 +190,9 @@ can be used in the 1D case, and tensor product smooths (\code{te}) are typically
If \code{cluster} is provided as a cluster set up using \code{\link[parallel]{makeCluster}} (or \code{\link[parallel]{makeForkCluster}}) from the \code{parallel} package, then the rate limiting QR decomposition of the model matrix is performed in parallel using this cluster. Note that the speed ups are often not that great. On a multi-core machine it is usually best to set the cluster size to the number of physical cores, which is often less than what is reported by \code{\link[parallel]{detectCores}}. Using more than the number of physical cores can result in no speed up at all (or even a slow down). Note that a highly parallel BLAS may negate all advantage from using a cluster of cores. Computing in parallel of course requires more memory than computing in series. See examples.
When \code{discrete=TRUE} the covariate data are first discretized. Discretization takes place on a smooth by smooth basis, or in the case of tensor product smooths (or any smooth that can be represented as such, such as random effects), separately for each marginal smooth. The required spline bases are then evaluated at the discrete values, and stored, along with index vectors indicating which original observation they relate to. Fitting is by a version of performance oriented iteration/PQL using REML smoothing parameter selection on each iterative working model (as for the default method). The iteration is based on the derivatives of the REML score, without computing the score itself, allowing the expensive computations to be reduced to one parallel block Cholesky decomposition per iteration (plus two basic operations of equal cost, but easily parallelized). Unlike standard POI/PQL, only one step of the smoothing parameter update for the working model is taken at each step (rather than iterating to the optimal set of smoothing parameters for each working model). At each step a weighted model matrix crossproduct of the model matrix is required - this is efficiently computed from the pre-computed basis functions evaluated at the discretized covariate values. Efficient computation with tensor product terms means that some terms within a tensor product may be re-ordered for maximum efficiency. Parallel computation is controlled using the \code{nthreads} argument. For this method no cluster computation is used, and the \code{parallel} package is not required.
When \code{discrete=TRUE} the covariate data are first discretized. Discretization takes place on a smooth by smooth basis, or in the case of tensor product smooths (or any smooth that can be represented as such, such as random effects), separately for each marginal smooth. The required spline bases are then evaluated at the discrete values, and stored, along with index vectors indicating which original observation they relate to. Fitting is by a version of performance oriented iteration/PQL using REML smoothing parameter selection on each iterative working model (as for the default method). The iteration is based on the derivatives of the REML score, without computing the score itself, allowing the expensive computations to be reduced to one parallel block Cholesky decomposition per iteration (plus two basic operations of equal cost, but easily parallelized). Unlike standard POI/PQL, only one step of the smoothing parameter update for the working model is taken at each step (rather than iterating to the optimal set of smoothing parameters for each working model). At each step a weighted model matrix crossproduct of the model matrix is required - this is efficiently computed from the pre-computed basis functions evaluated at the discretized covariate values. Efficient computation with tensor product terms means that some terms within a tensor product may be re-ordered for maximum efficiency. Parallel computation is controlled using the \code{nthreads} argument. For this method no cluster computation is used, and the \code{parallel} package is not required.
The extended families given in \code{\link{family.mgcv}} can also be used. The extra parameters of these are estimated by maximizing the penalized likelihood, rather than the restricted marginal likelihood as in \code{\link{gam}}. So estimates may differ slightly from those returned by \code{\link{gam}}. Estimation is accomplished by a Newton iteration to find the extra parameters (e.g. the theta parameter of the negative binomial or the degrees of freedom and scale of the scaled t) maximizing the log likelihood given the model coefficients at each iteration of the fitting procedure.
}
......@@ -197,7 +200,7 @@ When \code{discrete=TRUE} the covariate data are first discretized. Discretizati
\references{
Wood, S.N., Goude, Y. & Shaw S. (2015) Generalized additive models for large datasets. Journal of the Royal Statistical Society, Series C 64(1): 139-155.
Wood, S.N., Li, Z., Shaddick, G. & Augustin N.H. (2016) Generalized additive models for gigadata: modelling the UK black smoke network daily data. Journal of the American Statistical Association.
Wood, S.N., Li, Z., Shaddick, G. & Augustin N.H. (2017) Generalized additive models for gigadata: modelling the UK black smoke network daily data. Journal of the American Statistical Association.
}
......
......@@ -64,7 +64,7 @@ conclusions to be drawn from a model fit, however.
\references{
Wood, S.N. (2006) Generalized Additive Models: An Introduction with R. CRC.
Wood, S.N. (2017) Generalized Additive Models: An Introduction with R (2nd edition). CRC/Taylor & Francis.
\url{http://www.maths.bris.ac.uk/~sw15190/}
}
......
......@@ -3,11 +3,12 @@
%- Also NEED an `\alias' for EACH other topic documented here.
\title{Additive Cox Proportional Hazard Model}
\description{The \code{cox.ph} family implements the Cox Proportional Hazards model with Peto's
correction for ties, and estimation by penalized partial likelihood maximization, for use with
\code{\link{gam}}. In the model formula, event time is the response. The \code{weights} vector provides
the censoring information (0 for censoring, 1 for event). Deals with the case in which each subject has
correction for ties, optional stratification, and estimation by penalized partial likelihood maximization, for use with
\code{\link{gam}}. In the model formula, event time is the response. Under stratification the response has two columns: time
and a numeric index for stratum. The \code{weights} vector provides
the censoring information (0 for censoring, 1 for event). \code{cox.ph} deals with the case in which each subject has
one event/censoring time and one row of covariate values. When each subject has several time dependent
covariates see \code{\link{cox.pht}}.
covariates see \code{\link{cox.pht}}.
See example below for conditional logistic regression.
}
......@@ -26,7 +27,9 @@ cox.ph(link="identity")
\details{Used with \code{\link{gam}} to fit Cox Proportional Hazards models to survival data. The model formula will
have event/censoring times on the left hand side and the linear predictor specification on the right hand side. Censoring
information is provided by the \code{weights} argument to \code{gam}, with 1 indicating an event and 0 indicating
censoring.
censoring.
Stratification is possible, allowing for different baseline hazards in different strata. In that case the response has two columns: the first is event/censoring time and the second is a numeric stratum index. See below for an example.
Prediction from the fitted model object (using the \code{predict} method) with \code{type="response"} will predict on the
survivor function scale. See example code below for extracting the cumulative baseline hazard/survival directly.
......@@ -41,7 +44,7 @@ estimation of residuals, the cumulative baseline hazard, survival function and a
The percentage deviance explained reported for Cox PH models is based on the sum of squares of the deviance residuals, as the model deviance, and the sum of squares of the deviance residuals when the covariate effects are set to zero, as the null deviance. The same baseline hazard estimate is used for both.
This family deals efficiently with the case in which each subject has one event/censoring time and one row of covariate values. For studies in which there are multiple time varying covariate measures for each subject then the equivalent Poisson model should be fitted to suitable pseudodata using \code{bam(...,discrete=TRUE)}.
This family deals efficiently with the case in which each subject has one event/censoring time and one row of covariate values. For studies in which there are multiple time varying covariate measures for each subject then the equivalent Poisson model should be fitted to suitable pseudodata using \code{bam(...,discrete=TRUE)}. See \code{\link{cox.pht}}.
}
\references{
......@@ -56,6 +59,7 @@ Journal of the American Statistical Association 111, 1548-1575
\url{http://dx.doi.org/10.1080/01621459.2016.1180986}
}
\seealso{\code{\link{cox.pht}}}
\examples{
library(mgcv)
......@@ -93,6 +97,29 @@ lines(b$family$data$tr,exp(-b$family$data$h + 2*b$family$data$q^.5),col=2)
lines(b$family$data$tr,exp(-b$family$data$h - 2*b$family$data$q^.5),col=2)
lines(b$family$data$tr,exp(-b$family$data$km),lty=2) ## Kaplan Meier
## stratification example, with 2 randomly allocated strata
## so that results should be similar to previous....
col1$strata <- sample(1:2,nrow(col1),replace=TRUE)
bs <- gam(cbind(time,strata)~s(age,by=sex)+sex+s(nodes)+perfor+rx+obstruct+adhere,
family=cox.ph(),data=col1,weights=status)
plot(bs,pages=1,all.terms=TRUE) ## plot effects
## baseline survival plots by strata...
for (i in 1:2) { ## loop over strata
## create index picking out elements of stored hazard info for this stratum...
ind <- which(bs$family$data$tr.strat == i)
if (i==1) plot(bs$family$data$tr[ind],exp(-bs$family$data$h[ind]),type="l",ylim=c(0,1),
xlab="time",ylab="survival",lwd=2,col=i) else
lines(bs$family$data$tr[ind],exp(-bs$family$data$h[ind]),lwd=2,col=i)
lines(bs$family$data$tr[ind],exp(-bs$family$data$h[ind] +
2*bs$family$data$q[ind]^.5),lty=2,col=i) ## upper ci
lines(bs$family$data$tr[ind],exp(-bs$family$data$h[ind] -
2*bs$family$data$q[ind]^.5),lty=2,col=i) ## lower ci
lines(bs$family$data$tr[ind],exp(-bs$family$data$km[ind]),col=i) ## KM
}
## Simple simulated known truth example...
ph.weibull.sim <- function(eta,gamma=1,h0=.01,t1=100) {
lambda <- h0*exp(eta)
......@@ -120,12 +147,17 @@ b <- gam(t~s(x0)+s(x1)+s(x2,k=15)+s(x3),family=cox.ph,weights=d,data=surv)
plot(b,pages=1)
## conditional logistic regression models are often estimated using the
## cox.ph partial likelihood. Following compares to 'clogit'...
## cox proportional hazards partial likelihood with a strata for each
## case-control group. A dummy vector of times is created (all equal).
## The following compares to 'clogit' for a simple case. Note that
## the gam log likelihood is not exact if there is more than one case
## per stratum, corresponding to clogit's approximate method.
library(survival);library(mgcv)
infert$dumt <- rep(1,nrow(infert))
mg <- gam(dumt ~ spontaneous + induced + factor(stratum), data=infert,
mg <- gam(cbind(dumt,stratum) ~ spontaneous + induced, data=infert,
family=cox.ph,weights=case)
ms <- clogit(case ~ spontaneous + induced + strata(stratum), data=infert)
ms <- clogit(case ~ spontaneous + induced + strata(stratum), data=infert,
method="approximate")
summary(mg)$p.table[1:2,]; ms
}
\keyword{models} \keyword{regression}%-- one or more ..
......
......@@ -74,7 +74,7 @@ Generalized Additive Mixed Models. Biometrics 62(4):1025-1036
or
Wood S.N. (2006) Generalized Additive Models: An Introduction with R. Chapman
Wood S.N. (2017) Generalized Additive Models: An Introduction with R (2nd edition). Chapman
and Hall/CRC Press.